core_arch-0.1.5/build.rs010064400007650000024000000000761343103037700133320ustar0000000000000000fn main() { println!("cargo:rustc-cfg=core_arch_docs"); } core_arch-0.1.5/Cargo.toml.orig010064400007650000024000000023201345562045700145600ustar0000000000000000[package] name = "core_arch" version = "0.1.5" authors = [ "Alex Crichton ", "Andrew Gallant ", "Gonzalo Brito Gadeschi ", ] description = "`core::arch` - Rust's core library architecture-specific intrinsics." documentation = "https://docs.rs/core_arch" homepage = "https://github.com/rust-lang-nursery/stdsimd" repository = "https://github.com/rust-lang-nursery/stdsimd" readme = "README.md" keywords = ["core", "simd", "arch", "intrinsics"] categories = ["hardware-support", "no-std"] license = "MIT/Apache-2.0" build = "build.rs" edition = "2015" [badges] travis-ci = { repository = "rust-lang-nursery/stdsimd" } appveyor = { repository = "rust-lang-nursery/stdsimd" } is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/stdsimd" } is-it-maintained-open-issues = { repository = "rust-lang-nursery/stdsimd" } maintenance = { status = "experimental" } #[dev-dependencies] #stdsimd-test = { version = "0.*", path = "../stdsimd-test" } #std_detect = { version = "0.*", path = "../std_detect" } [target.wasm32-unknown-unknown.dev-dependencies] wasm-bindgen-test = "=0.2.19" [package.metadata.docs.rs] rustdoc-args = [ "--cfg", "dox" ]core_arch-0.1.5/Cargo.toml0000644000000031150000000000000110210ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g. crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2015" name = "core_arch" version = "0.1.5" authors = ["Alex Crichton ", "Andrew Gallant ", "Gonzalo Brito Gadeschi "] build = "build.rs" description = "`core::arch` - Rust's core library architecture-specific intrinsics." homepage = "https://github.com/rust-lang-nursery/stdsimd" documentation = "https://docs.rs/core_arch" readme = "README.md" keywords = ["core", "simd", "arch", "intrinsics"] categories = ["hardware-support", "no-std"] license = "MIT/Apache-2.0" repository = "https://github.com/rust-lang-nursery/stdsimd" [package.metadata.docs.rs] rustdoc-args = ["--cfg", "dox"] [target.wasm32-unknown-unknown.dev-dependencies.wasm-bindgen-test] version = "=0.2.19" [badges.appveyor] repository = "rust-lang-nursery/stdsimd" [badges.is-it-maintained-issue-resolution] repository = "rust-lang-nursery/stdsimd" [badges.is-it-maintained-open-issues] repository = "rust-lang-nursery/stdsimd" [badges.maintenance] status = "experimental" [badges.travis-ci] repository = "rust-lang-nursery/stdsimd" core_arch-0.1.5/Cargo.toml.orig0000644000000031160000000000000117610ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2015" name = "core_arch" version = "0.1.5" authors = ["Alex Crichton ", "Andrew Gallant ", "Gonzalo Brito Gadeschi "] build = "build.rs" description = "`core::arch` - Rust's core library architecture-specific intrinsics." homepage = "https://github.com/rust-lang-nursery/stdsimd" documentation = "https://docs.rs/core_arch" readme = "README.md" keywords = ["core", "simd", "arch", "intrinsics"] categories = ["hardware-support", "no-std"] license = "MIT/Apache-2.0" repository = "https://github.com/rust-lang-nursery/stdsimd" [package.metadata.docs.rs] rustdoc-args = ["--cfg", "dox"] [target.wasm32-unknown-unknown.dev-dependencies.wasm-bindgen-test] version = "=0.2.19" [badges.appveyor] repository = "rust-lang-nursery/stdsimd" [badges.is-it-maintained-issue-resolution] repository = "rust-lang-nursery/stdsimd" [badges.is-it-maintained-open-issues] repository = "rust-lang-nursery/stdsimd" [badges.maintenance] status = "experimental" [badges.travis-ci] repository = "rust-lang-nursery/stdsimd" core_arch-0.1.5/foo.wasm010075500007650000024000000003741342163752400133530ustar0000000000000000asm`` envfoopA A A Gmemory__indirect_function_table __heap_base __data_endbar A   A  Dname=foo__wasm_call_ctorsbar_ZN3foo1x17ha5cd12f8666a0f75Ecore_arch-0.1.5/LICENSE-APACHE010064400007650000024000000251371342165767500136350ustar0000000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. core_arch-0.1.5/LICENSE-MIT010064400007650000024000000020571342165767500133410ustar0000000000000000Copyright (c) 2017 The Rust Project Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. core_arch-0.1.5/README.md010064400007650000024000000063061342454500000131420ustar0000000000000000`core::arch` - Rust's core library architecture-specific intrinsics ======= [![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] [![core_arch_crate_badge]][core_arch_crate_link] [![core_arch_docs_badge]][core_arch_docs_link] The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD). # Usage `core::arch` is available as part of `libcore` and it is re-exported by `libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate. Unstable features are often available in nightly Rust via the `feature(stdsimd)`. Using `core::arch` via this crate requires nightly Rust, and it can (and does) break often. The only cases in which you should consider using it via this crate are: * if you need to re-compile `core::arch` yourself, e.g., with particular target-features enabled that are not enabled for `libcore`/`libstd`. Note: if you need to re-compile it for a non-standard target, please prefer using `xargo` and re-compiling `libcore`/`libstd` as appropriate instead of using this crate. * using some features that might not be available even behind unstable Rust features. We try to keep these to a minimum. If you need to use some of these features, please open an issue so that we can expose them in nightly Rust and you can use them from there. # Documentation * [Documentation - i686][i686] * [Documentation - x86\_64][x86_64] * [Documentation - arm][arm] * [Documentation - aarch64][aarch64] * [Documentation - powerpc][powerpc] * [Documentation - powerpc64][powerpc64] * [How to get started][contrib] * [How to help implement intrinsics][help-implement] [contrib]: https://github.com/rust-lang-nursery/stdsimd/blob/master/CONTRIBUTING.md [help-implement]: https://github.com/rust-lang-nursery/stdsimd/issues/40 [i686]: https://rust-lang-nursery.github.io/stdsimd/i686/core_arch/ [x86_64]: https://rust-lang-nursery.github.io/stdsimd/x86_64/core_arch/ [arm]: https://rust-lang-nursery.github.io/stdsimd/arm/core_arch/ [aarch64]: https://rust-lang-nursery.github.io/stdsimd/aarch64/core_arch/ [powerpc]: https://rust-lang-nursery.github.io/stdsimd/powerpc/core_arch/ [powerpc64]: https://rust-lang-nursery.github.io/stdsimd/powerpc64/core_arch/ # License `core_arch` is primarily distributed under the terms of both the MIT license and the Apache License (Version 2.0), with portions covered by various BSD-like licenses. See LICENSE-APACHE, and LICENSE-MIT for details. # Contribution Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in `core_arch` by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. [travis]: https://travis-ci.com/rust-lang-nursery/stdsimd [Travis-CI Status]: https://travis-ci.com/rust-lang-nursery/stdsimd.svg?branch=master [appveyor]: https://ci.appveyor.com/project/rust-lang-libs/stdsimd/branch/master [Appveyor Status]: https://ci.appveyor.com/api/projects/status/ix74qhmilpibn00x/branch/master?svg=true [core_arch_crate_badge]: https://img.shields.io/crates/v/core_arch.svg [core_arch_crate_link]: https://crates.io/crates/core_arch [core_arch_docs_badge]: https://docs.rs/core_arch/badge.svg [core_arch_docs_link]: https://docs.rs/core_arch/ core_arch-0.1.5/src/aarch64/crc.rs010064400007650000024000000076461343447103600150370ustar0000000000000000extern "C" { #[link_name = "llvm.aarch64.crc32b"] fn crc32b_(crc: u32, data: u32) -> u32; #[link_name = "llvm.aarch64.crc32h"] fn crc32h_(crc: u32, data: u32) -> u32; #[link_name = "llvm.aarch64.crc32w"] fn crc32w_(crc: u32, data: u32) -> u32; #[link_name = "llvm.aarch64.crc32x"] fn crc32x_(crc: u32, data: u64) -> u32; #[link_name = "llvm.aarch64.crc32cb"] fn crc32cb_(crc: u32, data: u32) -> u32; #[link_name = "llvm.aarch64.crc32ch"] fn crc32ch_(crc: u32, data: u32) -> u32; #[link_name = "llvm.aarch64.crc32cw"] fn crc32cw_(crc: u32, data: u32) -> u32; #[link_name = "llvm.aarch64.crc32cx"] fn crc32cx_(crc: u32, data: u64) -> u32; } #[cfg(test)] use stdsimd_test::assert_instr; /// CRC32 single round checksum for bytes (8 bits). #[inline] #[target_feature(enable = "crc")] #[cfg_attr(test, assert_instr(crc32b))] pub unsafe fn __crc32b(crc: u32, data: u8) -> u32 { crc32b_(crc, data as u32) } /// CRC32 single round checksum for half words (16 bits). #[inline] #[target_feature(enable = "crc")] #[cfg_attr(test, assert_instr(crc32h))] pub unsafe fn __crc32h(crc: u32, data: u16) -> u32 { crc32h_(crc, data as u32) } /// CRC32 single round checksum for words (32 bits). #[inline] #[target_feature(enable = "crc")] #[cfg_attr(test, assert_instr(crc32w))] pub unsafe fn __crc32w(crc: u32, data: u32) -> u32 { crc32w_(crc, data) } /// CRC32 single round checksum for quad words (64 bits). #[inline] #[target_feature(enable = "crc")] #[cfg_attr(test, assert_instr(crc32x))] pub unsafe fn __crc32d(crc: u32, data: u64) -> u32 { crc32x_(crc, data) } /// CRC32-C single round checksum for bytes (8 bits). #[inline] #[target_feature(enable = "crc")] #[cfg_attr(test, assert_instr(crc32cb))] pub unsafe fn __crc32cb(crc: u32, data: u8) -> u32 { crc32cb_(crc, data as u32) } /// CRC32-C single round checksum for half words (16 bits). #[inline] #[target_feature(enable = "crc")] #[cfg_attr(test, assert_instr(crc32ch))] pub unsafe fn __crc32ch(crc: u32, data: u16) -> u32 { crc32ch_(crc, data as u32) } /// CRC32-C single round checksum for words (32 bits). #[inline] #[target_feature(enable = "crc")] #[cfg_attr(test, assert_instr(crc32cw))] pub unsafe fn __crc32cw(crc: u32, data: u32) -> u32 { crc32cw_(crc, data) } /// CRC32-C single round checksum for quad words (64 bits). #[inline] #[target_feature(enable = "crc")] #[cfg_attr(test, assert_instr(crc32cx))] pub unsafe fn __crc32cd(crc: u32, data: u64) -> u32 { crc32cx_(crc, data) } #[cfg(test)] mod tests { use crate::core_arch::{aarch64::*, simd::*}; use std::mem; use stdsimd_test::simd_test; #[simd_test(enable = "crc")] unsafe fn test_crc32b() { assert_eq!(__crc32b(0, 0), 0); assert_eq!(__crc32b(0, 255), 755167117); } #[simd_test(enable = "crc")] unsafe fn test_crc32h() { assert_eq!(__crc32h(0, 0), 0); assert_eq!(__crc32h(0, 16384), 1994146192); } #[simd_test(enable = "crc")] unsafe fn test_crc32w() { assert_eq!(__crc32w(0, 0), 0); assert_eq!(__crc32w(0, 4294967295), 3736805603); } #[simd_test(enable = "crc")] unsafe fn test_crc32d() { assert_eq!(__crc32d(0, 0), 0); assert_eq!(__crc32d(0, 18446744073709551615), 1147535477); } #[simd_test(enable = "crc")] unsafe fn test_crc32cb() { assert_eq!(__crc32cb(0, 0), 0); assert_eq!(__crc32cb(0, 255), 2910671697); } #[simd_test(enable = "crc")] unsafe fn test_crc32ch() { assert_eq!(__crc32ch(0, 0), 0); assert_eq!(__crc32ch(0, 16384), 1098587580); } #[simd_test(enable = "crc")] unsafe fn test_crc32cw() { assert_eq!(__crc32cw(0, 0), 0); assert_eq!(__crc32cw(0, 4294967295), 3080238136); } #[simd_test(enable = "crc")] unsafe fn test_crc32cd() { assert_eq!(__crc32cd(0, 0), 0); assert_eq!(__crc32cd(0, 18446744073709551615), 3293575501); } } core_arch-0.1.5/src/aarch64/crypto.rs010064400007650000024000000263361343447103600156050ustar0000000000000000use crate::core_arch::arm::{uint32x4_t, uint8x16_t}; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.aarch64.crypto.aese"] fn vaeseq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; #[link_name = "llvm.aarch64.crypto.aesd"] fn vaesdq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t; #[link_name = "llvm.aarch64.crypto.aesmc"] fn vaesmcq_u8_(data: uint8x16_t) -> uint8x16_t; #[link_name = "llvm.aarch64.crypto.aesimc"] fn vaesimcq_u8_(data: uint8x16_t) -> uint8x16_t; #[link_name = "llvm.aarch64.crypto.sha1h"] fn vsha1h_u32_(hash_e: u32) -> u32; #[link_name = "llvm.aarch64.crypto.sha1su0"] fn vsha1su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.crypto.sha1su1"] fn vsha1su1q_u32_(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.crypto.sha1c"] fn vsha1cq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.crypto.sha1p"] fn vsha1pq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.crypto.sha1m"] fn vsha1mq_u32_(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.crypto.sha256h"] fn vsha256hq_u32_(hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.crypto.sha256h2"] fn vsha256h2q_u32_(hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.crypto.sha256su0"] fn vsha256su0q_u32_(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.crypto.sha256su1"] fn vsha256su1q_u32_(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t; } #[cfg(test)] use stdsimd_test::assert_instr; /// AES single round encryption. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(aese))] pub unsafe fn vaeseq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { vaeseq_u8_(data, key) } /// AES single round decryption. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(aesd))] pub unsafe fn vaesdq_u8(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t { vaesdq_u8_(data, key) } /// AES mix columns. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(aesmc))] pub unsafe fn vaesmcq_u8(data: uint8x16_t) -> uint8x16_t { vaesmcq_u8_(data) } /// AES inverse mix columns. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(aesimc))] pub unsafe fn vaesimcq_u8(data: uint8x16_t) -> uint8x16_t { vaesimcq_u8_(data) } /// SHA1 fixed rotate. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha1h))] pub unsafe fn vsha1h_u32(hash_e: u32) -> u32 { vsha1h_u32_(hash_e) } /// SHA1 hash update accelerator, choose. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha1c))] pub unsafe fn vsha1cq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { vsha1cq_u32_(hash_abcd, hash_e, wk) } /// SHA1 hash update accelerator, majority. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha1m))] pub unsafe fn vsha1mq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { vsha1mq_u32_(hash_abcd, hash_e, wk) } /// SHA1 hash update accelerator, parity. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha1p))] pub unsafe fn vsha1pq_u32(hash_abcd: uint32x4_t, hash_e: u32, wk: uint32x4_t) -> uint32x4_t { vsha1pq_u32_(hash_abcd, hash_e, wk) } /// SHA1 schedule update accelerator, first part. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha1su0))] pub unsafe fn vsha1su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t, w8_11: uint32x4_t) -> uint32x4_t { vsha1su0q_u32_(w0_3, w4_7, w8_11) } /// SHA1 schedule update accelerator, second part. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha1su1))] pub unsafe fn vsha1su1q_u32(tw0_3: uint32x4_t, w12_15: uint32x4_t) -> uint32x4_t { vsha1su1q_u32_(tw0_3, w12_15) } /// SHA256 hash update accelerator. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha256h))] pub unsafe fn vsha256hq_u32( hash_abcd: uint32x4_t, hash_efgh: uint32x4_t, wk: uint32x4_t, ) -> uint32x4_t { vsha256hq_u32_(hash_abcd, hash_efgh, wk) } /// SHA256 hash update accelerator, upper part. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha256h2))] pub unsafe fn vsha256h2q_u32( hash_efgh: uint32x4_t, hash_abcd: uint32x4_t, wk: uint32x4_t, ) -> uint32x4_t { vsha256h2q_u32_(hash_efgh, hash_abcd, wk) } /// SHA256 schedule update accelerator, first part. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha256su0))] pub unsafe fn vsha256su0q_u32(w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t { vsha256su0q_u32_(w0_3, w4_7) } /// SHA256 schedule update accelerator, second part. #[inline] #[target_feature(enable = "crypto")] #[cfg_attr(test, assert_instr(sha256su1))] pub unsafe fn vsha256su1q_u32( tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t, ) -> uint32x4_t { vsha256su1q_u32_(tw0_3, w8_11, w12_15) } #[cfg(test)] mod tests { use crate::core_arch::{aarch64::*, simd::*}; use std::mem; use stdsimd_test::simd_test; #[simd_test(enable = "crypto")] unsafe fn test_vaeseq_u8() { let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)); let key = ::mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); let r: u8x16 = ::mem::transmute(vaeseq_u8(data, key)); assert_eq!( r, u8x16::new( 124, 123, 124, 118, 124, 123, 124, 197, 124, 123, 124, 118, 124, 123, 124, 197 ) ); } #[simd_test(enable = "crypto")] unsafe fn test_vaesdq_u8() { let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)); let key = ::mem::transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); let r: u8x16 = ::mem::transmute(vaesdq_u8(data, key)); assert_eq!( r, u8x16::new(9, 213, 9, 251, 9, 213, 9, 56, 9, 213, 9, 251, 9, 213, 9, 56) ); } #[simd_test(enable = "crypto")] unsafe fn test_vaesmcq_u8() { let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)); let r: u8x16 = ::mem::transmute(vaesmcq_u8(data)); assert_eq!( r, u8x16::new(3, 4, 9, 10, 15, 8, 21, 30, 3, 4, 9, 10, 15, 8, 21, 30) ); } #[simd_test(enable = "crypto")] unsafe fn test_vaesimcq_u8() { let data = ::mem::transmute(u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8)); let r: u8x16 = ::mem::transmute(vaesimcq_u8(data)); assert_eq!( r, u8x16::new(43, 60, 33, 50, 103, 80, 125, 70, 43, 60, 33, 50, 103, 80, 125, 70) ); } #[simd_test(enable = "crypto")] unsafe fn test_vsha1h_u32() { assert_eq!(vsha1h_u32(0x1234), 0x048d); assert_eq!(vsha1h_u32(0x5678), 0x159e); } #[simd_test(enable = "crypto")] unsafe fn test_vsha1su0q_u32() { let r: u32x4 = ::mem::transmute(vsha1su0q_u32( ::mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)), ::mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)), ::mem::transmute(u32x4::new(0x1234_u32, 0x5678_u32, 0x9abc_u32, 0xdef0_u32)), )); assert_eq!(r, u32x4::new(0x9abc, 0xdef0, 0x1234, 0x5678)); } #[simd_test(enable = "crypto")] unsafe fn test_vsha1su1q_u32() { let r: u32x4 = ::mem::transmute(vsha1su1q_u32( ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), )); assert_eq!( r, u32x4::new(0x00008898, 0x00019988, 0x00008898, 0x0000acd0) ); } #[simd_test(enable = "crypto")] unsafe fn test_vsha1cq_u32() { let r: u32x4 = ::mem::transmute(vsha1cq_u32( ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), 0x1234, ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), )); assert_eq!( r, u32x4::new(0x8a32cbd8, 0x0c518a96, 0x0018a081, 0x0000c168) ); } #[simd_test(enable = "crypto")] unsafe fn test_vsha1pq_u32() { let r: u32x4 = ::mem::transmute(vsha1pq_u32( ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), 0x1234, ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), )); assert_eq!( r, u32x4::new(0x469f0ba3, 0x0a326147, 0x80145d7f, 0x00009f47) ); } #[simd_test(enable = "crypto")] unsafe fn test_vsha1mq_u32() { let r: u32x4 = ::mem::transmute(vsha1mq_u32( ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), 0x1234, ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), )); assert_eq!( r, u32x4::new(0xaa39693b, 0x0d51bf84, 0x001aa109, 0x0000d278) ); } #[simd_test(enable = "crypto")] unsafe fn test_vsha256hq_u32() { let r: u32x4 = ::mem::transmute(vsha256hq_u32( ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), )); assert_eq!( r, u32x4::new(0x05e9aaa8, 0xec5f4c02, 0x20a1ea61, 0x28738cef) ); } #[simd_test(enable = "crypto")] unsafe fn test_vsha256h2q_u32() { let r: u32x4 = ::mem::transmute(vsha256h2q_u32( ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), )); assert_eq!( r, u32x4::new(0x3745362e, 0x2fb51d00, 0xbd4c529b, 0x968b8516) ); } #[simd_test(enable = "crypto")] unsafe fn test_vsha256su0q_u32() { let r: u32x4 = ::mem::transmute(vsha256su0q_u32( ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), )); assert_eq!( r, u32x4::new(0xe59e1c97, 0x5eaf68da, 0xd7bcb51f, 0x6c8de152) ); } #[simd_test(enable = "crypto")] unsafe fn test_vsha256su1q_u32() { let r: u32x4 = ::mem::transmute(vsha256su1q_u32( ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), ::mem::transmute(u32x4::new(0x1234, 0x5678, 0x9abc, 0xdef0)), )); assert_eq!( r, u32x4::new(0x5e09e8d2, 0x74a6f16b, 0xc966606b, 0xa686ee9f) ); } } core_arch-0.1.5/src/aarch64/mod.rs010064400007650000024000000013131343447103600150300ustar0000000000000000//! AArch64 intrinsics. //! //! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The //! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful. //! //! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf //! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics mod v8; pub use self::v8::*; mod neon; pub use self::neon::*; mod crypto; pub use self::crypto::*; mod crc; pub use self::crc::*; pub use super::acle::*; #[cfg(test)] use stdsimd_test::assert_instr; /// Generates the trap instruction `BRK 1` #[cfg_attr(test, assert_instr(brk))] #[inline] pub unsafe fn brk() -> ! { crate::intrinsics::abort() } core_arch-0.1.5/src/aarch64/neon.rs010064400007650000024000001637441343447103600152310ustar0000000000000000//! ARMv8 ASIMD intrinsics #![allow(non_camel_case_types)] // FIXME: replace neon with asimd use crate::{ core_arch::{arm::*, simd_llvm::*}, mem::{transmute, zeroed}, }; #[cfg(test)] use stdsimd_test::assert_instr; types! { /// ARM-specific 64-bit wide vector of one packed `f64`. pub struct float64x1_t(f64); // FIXME: check this! /// ARM-specific 128-bit wide vector of two packed `f64`. pub struct float64x2_t(f64, f64); /// ARM-specific 64-bit wide vector of one packed `p64`. pub struct poly64x1_t(i64); // FIXME: check this! /// ARM-specific 64-bit wide vector of two packed `p64`. pub struct poly64x2_t(i64, i64); // FIXME: check this! } /// ARM-specific type containing two `int8x16_t` vectors. #[derive(Copy, Clone)] pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t); /// ARM-specific type containing three `int8x16_t` vectors. #[derive(Copy, Clone)] pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t); /// ARM-specific type containing four `int8x16_t` vectors. #[derive(Copy, Clone)] pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t); /// ARM-specific type containing two `uint8x16_t` vectors. #[derive(Copy, Clone)] pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t); /// ARM-specific type containing three `uint8x16_t` vectors. #[derive(Copy, Clone)] pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t); /// ARM-specific type containing four `uint8x16_t` vectors. #[derive(Copy, Clone)] pub struct uint8x16x4_t( pub uint8x16_t, pub uint8x16_t, pub uint8x16_t, pub uint8x16_t, ); /// ARM-specific type containing two `poly8x16_t` vectors. #[derive(Copy, Clone)] pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t); /// ARM-specific type containing three `poly8x16_t` vectors. #[derive(Copy, Clone)] pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t); /// ARM-specific type containing four `poly8x16_t` vectors. #[derive(Copy, Clone)] pub struct poly8x16x4_t( pub poly8x16_t, pub poly8x16_t, pub poly8x16_t, pub poly8x16_t, ); #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"] fn vmaxv_s8_(a: int8x8_t) -> i8; #[link_name = "llvm.aarch64.neon.smaxv.i8.6i8"] fn vmaxvq_s8_(a: int8x16_t) -> i8; #[link_name = "llvm.aarch64.neon.smaxv.i16.v4i16"] fn vmaxv_s16_(a: int16x4_t) -> i16; #[link_name = "llvm.aarch64.neon.smaxv.i16.v8i16"] fn vmaxvq_s16_(a: int16x8_t) -> i16; #[link_name = "llvm.aarch64.neon.smaxv.i32.v2i32"] fn vmaxv_s32_(a: int32x2_t) -> i32; #[link_name = "llvm.aarch64.neon.smaxv.i32.v4i32"] fn vmaxvq_s32_(a: int32x4_t) -> i32; #[link_name = "llvm.aarch64.neon.umaxv.i8.v8i8"] fn vmaxv_u8_(a: uint8x8_t) -> u8; #[link_name = "llvm.aarch64.neon.umaxv.i8.6i8"] fn vmaxvq_u8_(a: uint8x16_t) -> u8; #[link_name = "llvm.aarch64.neon.umaxv.i16.v4i16"] fn vmaxv_u16_(a: uint16x4_t) -> u16; #[link_name = "llvm.aarch64.neon.umaxv.i16.v8i16"] fn vmaxvq_u16_(a: uint16x8_t) -> u16; #[link_name = "llvm.aarch64.neon.umaxv.i32.v2i32"] fn vmaxv_u32_(a: uint32x2_t) -> u32; #[link_name = "llvm.aarch64.neon.umaxv.i32.v4i32"] fn vmaxvq_u32_(a: uint32x4_t) -> u32; #[link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"] fn vmaxv_f32_(a: float32x2_t) -> f32; #[link_name = "llvm.aarch64.neon.fmaxv.f32.v4f32"] fn vmaxvq_f32_(a: float32x4_t) -> f32; #[link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"] fn vmaxvq_f64_(a: float64x2_t) -> f64; #[link_name = "llvm.aarch64.neon.sminv.i8.v8i8"] fn vminv_s8_(a: int8x8_t) -> i8; #[link_name = "llvm.aarch64.neon.sminv.i8.6i8"] fn vminvq_s8_(a: int8x16_t) -> i8; #[link_name = "llvm.aarch64.neon.sminv.i16.v4i16"] fn vminv_s16_(a: int16x4_t) -> i16; #[link_name = "llvm.aarch64.neon.sminv.i16.v8i16"] fn vminvq_s16_(a: int16x8_t) -> i16; #[link_name = "llvm.aarch64.neon.sminv.i32.v2i32"] fn vminv_s32_(a: int32x2_t) -> i32; #[link_name = "llvm.aarch64.neon.sminv.i32.v4i32"] fn vminvq_s32_(a: int32x4_t) -> i32; #[link_name = "llvm.aarch64.neon.uminv.i8.v8i8"] fn vminv_u8_(a: uint8x8_t) -> u8; #[link_name = "llvm.aarch64.neon.uminv.i8.6i8"] fn vminvq_u8_(a: uint8x16_t) -> u8; #[link_name = "llvm.aarch64.neon.uminv.i16.v4i16"] fn vminv_u16_(a: uint16x4_t) -> u16; #[link_name = "llvm.aarch64.neon.uminv.i16.v8i16"] fn vminvq_u16_(a: uint16x8_t) -> u16; #[link_name = "llvm.aarch64.neon.uminv.i32.v2i32"] fn vminv_u32_(a: uint32x2_t) -> u32; #[link_name = "llvm.aarch64.neon.uminv.i32.v4i32"] fn vminvq_u32_(a: uint32x4_t) -> u32; #[link_name = "llvm.aarch64.neon.fminv.f32.v2f32"] fn vminv_f32_(a: float32x2_t) -> f32; #[link_name = "llvm.aarch64.neon.fminv.f32.v4f32"] fn vminvq_f32_(a: float32x4_t) -> f32; #[link_name = "llvm.aarch64.neon.fminv.f64.v2f64"] fn vminvq_f64_(a: float64x2_t) -> f64; #[link_name = "llvm.aarch64.neon.sminp.v16i8"] fn vpminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; #[link_name = "llvm.aarch64.neon.sminp.v8i16"] fn vpminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; #[link_name = "llvm.aarch64.neon.sminp.v4i32"] fn vpminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; #[link_name = "llvm.aarch64.neon.uminp.v16i8"] fn vpminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; #[link_name = "llvm.aarch64.neon.uminp.v8i16"] fn vpminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; #[link_name = "llvm.aarch64.neon.uminp.v4i32"] fn vpminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.neon.fminp.4f32"] fn vpminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; #[link_name = "llvm.aarch64.neon.fminp.v2f64"] fn vpminq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t; #[link_name = "llvm.aarch64.neon.smaxp.v16i8"] fn vpmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; #[link_name = "llvm.aarch64.neon.smaxp.v8i16"] fn vpmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; #[link_name = "llvm.aarch64.neon.smaxp.v4i32"] fn vpmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; #[link_name = "llvm.aarch64.neon.umaxp.v16i8"] fn vpmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; #[link_name = "llvm.aarch64.neon.umaxp.v8i16"] fn vpmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; #[link_name = "llvm.aarch64.neon.umaxp.v4i32"] fn vpmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; #[link_name = "llvm.aarch64.neon.fmaxp.4f32"] fn vpmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; #[link_name = "llvm.aarch64.neon.fmaxp.v2f64"] fn vpmaxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t; #[link_name = "llvm.aarch64.neon.tbl1.v8i8"] fn vqtbl1(a: int8x16_t, b: uint8x8_t) -> int8x8_t; #[link_name = "llvm.aarch64.neon.tbl1.v16i8"] fn vqtbl1q(a: int8x16_t, b: uint8x16_t) -> int8x16_t; #[link_name = "llvm.aarch64.neon.tbx1.v8i8"] fn vqtbx1(a: int8x8_t, b: int8x16_t, c: uint8x8_t) -> int8x8_t; #[link_name = "llvm.aarch64.neon.tbx1.v16i8"] fn vqtbx1q(a: int8x16_t, b: int8x16_t, c: uint8x16_t) -> int8x16_t; #[link_name = "llvm.aarch64.neon.tbl2.v8i8"] fn vqtbl2(a0: int8x16_t, a1: int8x16_t, b: uint8x8_t) -> int8x8_t; #[link_name = "llvm.aarch64.neon.tbl2.v16i8"] fn vqtbl2q(a0: int8x16_t, a1: int8x16_t, b: uint8x16_t) -> int8x16_t; #[link_name = "llvm.aarch64.neon.tbx2.v8i8"] fn vqtbx2(a: int8x8_t, b0: int8x16_t, b1: int8x16_t, c: uint8x8_t) -> int8x8_t; #[link_name = "llvm.aarch64.neon.tbx2.v16i8"] fn vqtbx2q(a: int8x16_t, b0: int8x16_t, b1: int8x16_t, c: uint8x16_t) -> int8x16_t; #[link_name = "llvm.aarch64.neon.tbl3.v8i8"] fn vqtbl3(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, b: uint8x8_t) -> int8x8_t; #[link_name = "llvm.aarch64.neon.tbl3.v16i8"] fn vqtbl3q(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, b: uint8x16_t) -> int8x16_t; #[link_name = "llvm.aarch64.neon.tbx3.v8i8"] fn vqtbx3(a: int8x8_t, b0: int8x16_t, b1: int8x16_t, b2: int8x16_t, c: uint8x8_t) -> int8x8_t; #[link_name = "llvm.aarch64.neon.tbx3.v16i8"] fn vqtbx3q( a: int8x16_t, b0: int8x16_t, b1: int8x16_t, b2: int8x16_t, c: uint8x16_t, ) -> int8x16_t; #[link_name = "llvm.aarch64.neon.tbl4.v8i8"] fn vqtbl4(a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, a3: int8x16_t, b: uint8x8_t) -> int8x8_t; #[link_name = "llvm.aarch64.neon.tbl4.v16i8"] fn vqtbl4q( a0: int8x16_t, a1: int8x16_t, a2: int8x16_t, a3: int8x16_t, b: uint8x16_t, ) -> int8x16_t; #[link_name = "llvm.aarch64.neon.tbx4.v8i8"] fn vqtbx4( a: int8x8_t, b0: int8x16_t, b1: int8x16_t, b2: int8x16_t, b3: int8x16_t, c: uint8x8_t, ) -> int8x8_t; #[link_name = "llvm.aarch64.neon.tbx4.v16i8"] fn vqtbx4q( a: int8x16_t, b0: int8x16_t, b1: int8x16_t, b2: int8x16_t, b3: int8x16_t, c: uint8x16_t, ) -> int8x16_t; } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fadd))] pub unsafe fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fadd))] pub unsafe fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(add))] pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 { let a: int64x1_t = transmute(a); let b: int64x1_t = transmute(b); simd_extract(simd_add(a, b), 0) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(add))] pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 { let a: uint64x1_t = transmute(a); let b: uint64x1_t = transmute(b); simd_extract(simd_add(a, b), 0) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smaxv))] pub unsafe fn vmaxv_s8(a: int8x8_t) -> i8 { vmaxv_s8_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smaxv))] pub unsafe fn vmaxvq_s8(a: int8x16_t) -> i8 { vmaxvq_s8_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smaxv))] pub unsafe fn vmaxv_s16(a: int16x4_t) -> i16 { vmaxv_s16_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smaxv))] pub unsafe fn vmaxvq_s16(a: int16x8_t) -> i16 { vmaxvq_s16_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smaxp))] pub unsafe fn vmaxv_s32(a: int32x2_t) -> i32 { vmaxv_s32_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smaxv))] pub unsafe fn vmaxvq_s32(a: int32x4_t) -> i32 { vmaxvq_s32_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umaxv))] pub unsafe fn vmaxv_u8(a: uint8x8_t) -> u8 { vmaxv_u8_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umaxv))] pub unsafe fn vmaxvq_u8(a: uint8x16_t) -> u8 { vmaxvq_u8_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umaxv))] pub unsafe fn vmaxv_u16(a: uint16x4_t) -> u16 { vmaxv_u16_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umaxv))] pub unsafe fn vmaxvq_u16(a: uint16x8_t) -> u16 { vmaxvq_u16_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umaxp))] pub unsafe fn vmaxv_u32(a: uint32x2_t) -> u32 { vmaxv_u32_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umaxv))] pub unsafe fn vmaxvq_u32(a: uint32x4_t) -> u32 { vmaxvq_u32_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxp))] pub unsafe fn vmaxv_f32(a: float32x2_t) -> f32 { vmaxv_f32_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxv))] pub unsafe fn vmaxvq_f32(a: float32x4_t) -> f32 { vmaxvq_f32_(a) } /// Horizontal vector max. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxp))] pub unsafe fn vmaxvq_f64(a: float64x2_t) -> f64 { vmaxvq_f64_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sminv))] pub unsafe fn vminv_s8(a: int8x8_t) -> i8 { vminv_s8_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sminv))] pub unsafe fn vminvq_s8(a: int8x16_t) -> i8 { vminvq_s8_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sminv))] pub unsafe fn vminv_s16(a: int16x4_t) -> i16 { vminv_s16_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sminv))] pub unsafe fn vminvq_s16(a: int16x8_t) -> i16 { vminvq_s16_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sminp))] pub unsafe fn vminv_s32(a: int32x2_t) -> i32 { vminv_s32_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sminv))] pub unsafe fn vminvq_s32(a: int32x4_t) -> i32 { vminvq_s32_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uminv))] pub unsafe fn vminv_u8(a: uint8x8_t) -> u8 { vminv_u8_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uminv))] pub unsafe fn vminvq_u8(a: uint8x16_t) -> u8 { vminvq_u8_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uminv))] pub unsafe fn vminv_u16(a: uint16x4_t) -> u16 { vminv_u16_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uminv))] pub unsafe fn vminvq_u16(a: uint16x8_t) -> u16 { vminvq_u16_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uminp))] pub unsafe fn vminv_u32(a: uint32x2_t) -> u32 { vminv_u32_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uminv))] pub unsafe fn vminvq_u32(a: uint32x4_t) -> u32 { vminvq_u32_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminp))] pub unsafe fn vminv_f32(a: float32x2_t) -> f32 { vminv_f32_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminv))] pub unsafe fn vminvq_f32(a: float32x4_t) -> f32 { vminvq_f32_(a) } /// Horizontal vector min. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminp))] pub unsafe fn vminvq_f64(a: float64x2_t) -> f64 { vminvq_f64_(a) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sminp))] pub unsafe fn vpminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { vpminq_s8_(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sminp))] pub unsafe fn vpminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { vpminq_s16_(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(sminp))] pub unsafe fn vpminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { vpminq_s32_(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uminp))] pub unsafe fn vpminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { vpminq_u8_(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uminp))] pub unsafe fn vpminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { vpminq_u16_(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uminp))] pub unsafe fn vpminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { vpminq_u32_(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminp))] pub unsafe fn vpminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { vpminq_f32_(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fminp))] pub unsafe fn vpminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { vpminq_f64_(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smaxp))] pub unsafe fn vpmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { vpmaxq_s8_(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smaxp))] pub unsafe fn vpmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { vpmaxq_s16_(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(smaxp))] pub unsafe fn vpmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { vpmaxq_s32_(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umaxp))] pub unsafe fn vpmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { vpmaxq_u8_(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umaxp))] pub unsafe fn vpmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { vpmaxq_u16_(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(umaxp))] pub unsafe fn vpmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { vpmaxq_u32_(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxp))] pub unsafe fn vpmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { vpmaxq_f32_(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(fmaxp))] pub unsafe fn vpmaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { vpmaxq_f64_(a, b) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_s8(low: int8x8_t, high: int8x8_t) -> int8x16_t { simd_shuffle16( low, high, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], ) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_s16(low: int16x4_t, high: int16x4_t) -> int16x8_t { simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_s32(low: int32x2_t, high: int32x2_t) -> int32x4_t { simd_shuffle4(low, high, [0, 1, 2, 3]) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_s64(low: int64x1_t, high: int64x1_t) -> int64x2_t { simd_shuffle2(low, high, [0, 1]) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_u8(low: uint8x8_t, high: uint8x8_t) -> uint8x16_t { simd_shuffle16( low, high, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], ) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_u16(low: uint16x4_t, high: uint16x4_t) -> uint16x8_t { simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_u32(low: uint32x2_t, high: uint32x2_t) -> uint32x4_t { simd_shuffle4(low, high, [0, 1, 2, 3]) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_u64(low: uint64x1_t, high: uint64x1_t) -> uint64x2_t { simd_shuffle2(low, high, [0, 1]) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_p64(low: poly64x1_t, high: poly64x1_t) -> poly64x2_t { simd_shuffle2(low, high, [0, 1]) } /* FIXME: 16-bit float /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_f16 ( low: float16x4_t, high: float16x4_t) -> float16x8_t { simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) } */ /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_f32(low: float32x2_t, high: float32x2_t) -> float32x4_t { simd_shuffle4(low, high, [0, 1, 2, 3]) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_p8(low: poly8x8_t, high: poly8x8_t) -> poly8x16_t { simd_shuffle16( low, high, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], ) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_p16(low: poly16x4_t, high: poly16x4_t) -> poly16x8_t { simd_shuffle8(low, high, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Vector combine #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(mov))] pub unsafe fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t { simd_shuffle2(low, high, [0, 1]) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { vqtbl1_s8(vcombine_s8(a, zeroed()), transmute(b)) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { vqtbl1_u8(vcombine_u8(a, zeroed()), b) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { vqtbl1_p8(vcombine_p8(a, zeroed()), b) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { vqtbl1_s8(vcombine_s8(a.0, a.1), transmute(b)) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { vqtbl1_u8(vcombine_u8(a.0, a.1), b) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { vqtbl1_p8(vcombine_p8(a.0, a.1), b) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { vqtbl2_s8( int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, zeroed())), transmute(b), ) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { vqtbl2_u8( uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, zeroed())), b, ) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { vqtbl2_p8( poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, zeroed())), b, ) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { vqtbl2_s8( int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3)), transmute(b), ) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { vqtbl2_u8( uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3)), b, ) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { vqtbl2_p8( poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3)), b, ) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { use crate::core_arch::simd::i8x8; let r = vqtbx1_s8(a, vcombine_s8(b, zeroed()), transmute(c)); let m: int8x8_t = simd_lt(c, transmute(i8x8::splat(8))); simd_select(m, r, a) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { use crate::core_arch::simd::u8x8; let r = vqtbx1_u8(a, vcombine_u8(b, zeroed()), c); let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8))); simd_select(m, r, a) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { use crate::core_arch::simd::u8x8; let r = vqtbx1_p8(a, vcombine_p8(b, zeroed()), c); let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8))); simd_select(m, r, a) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { vqtbx1_s8(a, vcombine_s8(b.0, b.1), transmute(c)) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { vqtbx1_u8(a, vcombine_u8(b.0, b.1), c) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { vqtbx1_p8(a, vcombine_p8(b.0, b.1), c) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { use crate::core_arch::simd::i8x8; let r = vqtbx2_s8( a, int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, zeroed())), transmute(c), ); let m: int8x8_t = simd_lt(c, transmute(i8x8::splat(24))); simd_select(m, r, a) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { use crate::core_arch::simd::u8x8; let r = vqtbx2_u8( a, uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, zeroed())), c, ); let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(24))); simd_select(m, r, a) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { use crate::core_arch::simd::u8x8; let r = vqtbx2_p8( a, poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, zeroed())), c, ); let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(24))); simd_select(m, r, a) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { vqtbx2_s8( a, int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3)), transmute(c), ) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { vqtbx2_u8( a, uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3)), c, ) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { vqtbx2_p8( a, poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3)), c, ) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl1_s8(t: int8x16_t, idx: uint8x8_t) -> int8x8_t { vqtbl1(t, idx) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl1q_s8(t: int8x16_t, idx: uint8x16_t) -> int8x16_t { vqtbl1q(t, idx) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl1_u8(t: uint8x16_t, idx: uint8x8_t) -> uint8x8_t { transmute(vqtbl1(transmute(t), transmute(idx))) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl1q_u8(t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t { transmute(vqtbl1q(transmute(t), transmute(idx))) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl1_p8(t: poly8x16_t, idx: uint8x8_t) -> poly8x8_t { transmute(vqtbl1(transmute(t), transmute(idx))) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl1q_p8(t: poly8x16_t, idx: uint8x16_t) -> poly8x16_t { transmute(vqtbl1q(transmute(t), transmute(idx))) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx1_s8(a: int8x8_t, t: int8x16_t, idx: uint8x8_t) -> int8x8_t { vqtbx1(a, t, idx) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx1q_s8(a: int8x16_t, t: int8x16_t, idx: uint8x16_t) -> int8x16_t { vqtbx1q(a, t, idx) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx1_u8(a: uint8x8_t, t: uint8x16_t, idx: uint8x8_t) -> uint8x8_t { transmute(vqtbx1(transmute(a), transmute(t), transmute(idx))) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx1q_u8(a: uint8x16_t, t: uint8x16_t, idx: uint8x16_t) -> uint8x16_t { transmute(vqtbx1q(transmute(a), transmute(t), transmute(idx))) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx1_p8(a: poly8x8_t, t: poly8x16_t, idx: uint8x8_t) -> poly8x8_t { transmute(vqtbx1(transmute(a), transmute(t), transmute(idx))) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx1q_p8(a: poly8x16_t, t: poly8x16_t, idx: uint8x16_t) -> poly8x16_t { transmute(vqtbx1q(transmute(a), transmute(t), transmute(idx))) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl2_s8(t: int8x16x2_t, idx: uint8x8_t) -> int8x8_t { vqtbl2(t.0, t.1, idx) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl2q_s8(t: int8x16x2_t, idx: uint8x16_t) -> int8x16_t { vqtbl2q(t.0, t.1, idx) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl2_u8(t: uint8x16x2_t, idx: uint8x8_t) -> uint8x8_t { transmute(vqtbl2(transmute(t.0), transmute(t.1), transmute(idx))) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl2q_u8(t: uint8x16x2_t, idx: uint8x16_t) -> uint8x16_t { transmute(vqtbl2q(transmute(t.0), transmute(t.1), transmute(idx))) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl2_p8(t: poly8x16x2_t, idx: uint8x8_t) -> poly8x8_t { transmute(vqtbl2(transmute(t.0), transmute(t.1), transmute(idx))) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl2q_p8(t: poly8x16x2_t, idx: uint8x16_t) -> poly8x16_t { transmute(vqtbl2q(transmute(t.0), transmute(t.1), transmute(idx))) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx2_s8(a: int8x8_t, t: int8x16x2_t, idx: uint8x8_t) -> int8x8_t { vqtbx2(a, t.0, t.1, idx) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx2q_s8(a: int8x16_t, t: int8x16x2_t, idx: uint8x16_t) -> int8x16_t { vqtbx2q(a, t.0, t.1, idx) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx2_u8(a: uint8x8_t, t: uint8x16x2_t, idx: uint8x8_t) -> uint8x8_t { transmute(vqtbx2( transmute(a), transmute(t.0), transmute(t.1), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx2q_u8(a: uint8x16_t, t: uint8x16x2_t, idx: uint8x16_t) -> uint8x16_t { transmute(vqtbx2q( transmute(a), transmute(t.0), transmute(t.1), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx2_p8(a: poly8x8_t, t: poly8x16x2_t, idx: uint8x8_t) -> poly8x8_t { transmute(vqtbx2( transmute(a), transmute(t.0), transmute(t.1), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx2q_p8(a: poly8x16_t, t: poly8x16x2_t, idx: uint8x16_t) -> poly8x16_t { transmute(vqtbx2q( transmute(a), transmute(t.0), transmute(t.1), transmute(idx), )) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl3_s8(t: int8x16x3_t, idx: uint8x8_t) -> int8x8_t { vqtbl3(t.0, t.1, t.2, idx) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl3q_s8(t: int8x16x3_t, idx: uint8x16_t) -> int8x16_t { vqtbl3q(t.0, t.1, t.2, idx) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl3_u8(t: uint8x16x3_t, idx: uint8x8_t) -> uint8x8_t { transmute(vqtbl3( transmute(t.0), transmute(t.1), transmute(t.2), transmute(idx), )) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl3q_u8(t: uint8x16x3_t, idx: uint8x16_t) -> uint8x16_t { transmute(vqtbl3q( transmute(t.0), transmute(t.1), transmute(t.2), transmute(idx), )) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl3_p8(t: poly8x16x3_t, idx: uint8x8_t) -> poly8x8_t { transmute(vqtbl3( transmute(t.0), transmute(t.1), transmute(t.2), transmute(idx), )) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl3q_p8(t: poly8x16x3_t, idx: uint8x16_t) -> poly8x16_t { transmute(vqtbl3q( transmute(t.0), transmute(t.1), transmute(t.2), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx3_s8(a: int8x8_t, t: int8x16x3_t, idx: uint8x8_t) -> int8x8_t { vqtbx3(a, t.0, t.1, t.2, idx) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx3q_s8(a: int8x16_t, t: int8x16x3_t, idx: uint8x16_t) -> int8x16_t { vqtbx3q(a, t.0, t.1, t.2, idx) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx3_u8(a: uint8x8_t, t: uint8x16x3_t, idx: uint8x8_t) -> uint8x8_t { transmute(vqtbx3( transmute(a), transmute(t.0), transmute(t.1), transmute(t.2), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx3q_u8(a: uint8x16_t, t: uint8x16x3_t, idx: uint8x16_t) -> uint8x16_t { transmute(vqtbx3q( transmute(a), transmute(t.0), transmute(t.1), transmute(t.2), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx3_p8(a: poly8x8_t, t: poly8x16x3_t, idx: uint8x8_t) -> poly8x8_t { transmute(vqtbx3( transmute(a), transmute(t.0), transmute(t.1), transmute(t.2), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx3q_p8(a: poly8x16_t, t: poly8x16x3_t, idx: uint8x16_t) -> poly8x16_t { transmute(vqtbx3q( transmute(a), transmute(t.0), transmute(t.1), transmute(t.2), transmute(idx), )) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl4_s8(t: int8x16x4_t, idx: uint8x8_t) -> int8x8_t { vqtbl4(t.0, t.1, t.2, t.3, idx) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl4q_s8(t: int8x16x4_t, idx: uint8x16_t) -> int8x16_t { vqtbl4q(t.0, t.1, t.2, t.3, idx) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl4_u8(t: uint8x16x4_t, idx: uint8x8_t) -> uint8x8_t { transmute(vqtbl4( transmute(t.0), transmute(t.1), transmute(t.2), transmute(t.3), transmute(idx), )) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl4q_u8(t: uint8x16x4_t, idx: uint8x16_t) -> uint8x16_t { transmute(vqtbl4q( transmute(t.0), transmute(t.1), transmute(t.2), transmute(t.3), transmute(idx), )) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl4_p8(t: poly8x16x4_t, idx: uint8x8_t) -> poly8x8_t { transmute(vqtbl4( transmute(t.0), transmute(t.1), transmute(t.2), transmute(t.3), transmute(idx), )) } /// Table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbl))] pub unsafe fn vqtbl4q_p8(t: poly8x16x4_t, idx: uint8x16_t) -> poly8x16_t { transmute(vqtbl4q( transmute(t.0), transmute(t.1), transmute(t.2), transmute(t.3), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx4_s8(a: int8x8_t, t: int8x16x4_t, idx: uint8x8_t) -> int8x8_t { vqtbx4(a, t.0, t.1, t.2, t.3, idx) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx4q_s8(a: int8x16_t, t: int8x16x4_t, idx: uint8x16_t) -> int8x16_t { vqtbx4q(a, t.0, t.1, t.2, t.3, idx) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx4_u8(a: uint8x8_t, t: uint8x16x4_t, idx: uint8x8_t) -> uint8x8_t { transmute(vqtbx4( transmute(a), transmute(t.0), transmute(t.1), transmute(t.2), transmute(t.3), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx4q_u8(a: uint8x16_t, t: uint8x16x4_t, idx: uint8x16_t) -> uint8x16_t { transmute(vqtbx4q( transmute(a), transmute(t.0), transmute(t.1), transmute(t.2), transmute(t.3), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx4_p8(a: poly8x8_t, t: poly8x16x4_t, idx: uint8x8_t) -> poly8x8_t { transmute(vqtbx4( transmute(a), transmute(t.0), transmute(t.1), transmute(t.2), transmute(t.3), transmute(idx), )) } /// Extended table look-up #[inline] #[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(tbx))] pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> poly8x16_t { transmute(vqtbx4q( transmute(a), transmute(t.0), transmute(t.1), transmute(t.2), transmute(t.3), transmute(idx), )) } #[cfg(test)] mod tests { use crate::core_arch::{aarch64::*, simd::*}; use std::mem::transmute; use stdsimd_test::simd_test; #[simd_test(enable = "neon")] unsafe fn test_vadd_f64() { let a = 1.; let b = 8.; let e = 9.; let r: f64 = transmute(vadd_f64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddq_f64() { let a = f64x2::new(1., 2.); let b = f64x2::new(8., 7.); let e = f64x2::new(9., 9.); let r: f64x2 = transmute(vaddq_f64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddd_s64() { let a = 1_i64; let b = 8_i64; let e = 9_i64; let r: i64 = transmute(vaddd_s64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddd_u64() { let a = 1_u64; let b = 8_u64; let e = 9_u64; let r: u64 = transmute(vaddd_u64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_s8() { let r = vmaxv_s8(transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5))); assert_eq!(r, 7_i8); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_s8() { #[rustfmt::skip] let r = vmaxvq_s8(transmute(i8x16::new( 1, 2, 3, 4, -16, 6, 7, 5, 8, 1, 1, 1, 1, 1, 1, 1, ))); assert_eq!(r, 8_i8); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_s16() { let r = vmaxv_s16(transmute(i16x4::new(1, 2, -4, 3))); assert_eq!(r, 3_i16); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_s16() { let r = vmaxvq_s16(transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5))); assert_eq!(r, 7_i16); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_s32() { let r = vmaxv_s32(transmute(i32x2::new(1, -4))); assert_eq!(r, 1_i32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_s32() { let r = vmaxvq_s32(transmute(i32x4::new(1, 2, -32, 4))); assert_eq!(r, 4_i32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_u8() { let r = vmaxv_u8(transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5))); assert_eq!(r, 8_u8); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_u8() { #[rustfmt::skip] let r = vmaxvq_u8(transmute(u8x16::new( 1, 2, 3, 4, 16, 6, 7, 5, 8, 1, 1, 1, 1, 1, 1, 1, ))); assert_eq!(r, 16_u8); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_u16() { let r = vmaxv_u16(transmute(u16x4::new(1, 2, 4, 3))); assert_eq!(r, 4_u16); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_u16() { let r = vmaxvq_u16(transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5))); assert_eq!(r, 16_u16); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_u32() { let r = vmaxv_u32(transmute(u32x2::new(1, 4))); assert_eq!(r, 4_u32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_u32() { let r = vmaxvq_u32(transmute(u32x4::new(1, 2, 32, 4))); assert_eq!(r, 32_u32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxv_f32() { let r = vmaxv_f32(transmute(f32x2::new(1., 4.))); assert_eq!(r, 4_f32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_f32() { let r = vmaxvq_f32(transmute(f32x4::new(1., 2., 32., 4.))); assert_eq!(r, 32_f32); } #[simd_test(enable = "neon")] unsafe fn test_vmaxvq_f64() { let r = vmaxvq_f64(transmute(f64x2::new(1., 4.))); assert_eq!(r, 4_f64); } #[simd_test(enable = "neon")] unsafe fn test_vminv_s8() { let r = vminv_s8(transmute(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5))); assert_eq!(r, -8_i8); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_s8() { #[rustfmt::skip] let r = vminvq_s8(transmute(i8x16::new( 1, 2, 3, 4, -16, 6, 7, 5, 8, 1, 1, 1, 1, 1, 1, 1, ))); assert_eq!(r, -16_i8); } #[simd_test(enable = "neon")] unsafe fn test_vminv_s16() { let r = vminv_s16(transmute(i16x4::new(1, 2, -4, 3))); assert_eq!(r, -4_i16); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_s16() { let r = vminvq_s16(transmute(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5))); assert_eq!(r, -16_i16); } #[simd_test(enable = "neon")] unsafe fn test_vminv_s32() { let r = vminv_s32(transmute(i32x2::new(1, -4))); assert_eq!(r, -4_i32); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_s32() { let r = vminvq_s32(transmute(i32x4::new(1, 2, -32, 4))); assert_eq!(r, -32_i32); } #[simd_test(enable = "neon")] unsafe fn test_vminv_u8() { let r = vminv_u8(transmute(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5))); assert_eq!(r, 1_u8); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_u8() { #[rustfmt::skip] let r = vminvq_u8(transmute(u8x16::new( 1, 2, 3, 4, 16, 6, 7, 5, 8, 1, 1, 1, 1, 1, 1, 1, ))); assert_eq!(r, 1_u8); } #[simd_test(enable = "neon")] unsafe fn test_vminv_u16() { let r = vminv_u16(transmute(u16x4::new(1, 2, 4, 3))); assert_eq!(r, 1_u16); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_u16() { let r = vminvq_u16(transmute(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5))); assert_eq!(r, 1_u16); } #[simd_test(enable = "neon")] unsafe fn test_vminv_u32() { let r = vminv_u32(transmute(u32x2::new(1, 4))); assert_eq!(r, 1_u32); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_u32() { let r = vminvq_u32(transmute(u32x4::new(1, 2, 32, 4))); assert_eq!(r, 1_u32); } #[simd_test(enable = "neon")] unsafe fn test_vminv_f32() { let r = vminv_f32(transmute(f32x2::new(1., 4.))); assert_eq!(r, 1_f32); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_f32() { let r = vminvq_f32(transmute(f32x4::new(1., 2., 32., 4.))); assert_eq!(r, 1_f32); } #[simd_test(enable = "neon")] unsafe fn test_vminvq_f64() { let r = vminvq_f64(transmute(f64x2::new(1., 4.))); assert_eq!(r, 1_f64); } #[simd_test(enable = "neon")] unsafe fn test_vpminq_s8() { #[cfg_attr(rustfmt, skip)] let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #[cfg_attr(rustfmt, skip)] let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9); #[cfg_attr(rustfmt, skip)] let e = i8x16::new(-2, -4, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6); let r: i8x16 = transmute(vpminq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpminq_s16() { let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8); let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = i16x8::new(-2, 3, 5, 7, 0, 2, 4, 6); let r: i16x8 = transmute(vpminq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpminq_s32() { let a = i32x4::new(1, -2, 3, 4); let b = i32x4::new(0, 3, 2, 5); let e = i32x4::new(-2, 3, 0, 2); let r: i32x4 = transmute(vpminq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpminq_u8() { #[cfg_attr(rustfmt, skip)] let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #[cfg_attr(rustfmt, skip)] let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9); #[cfg_attr(rustfmt, skip)] let e = u8x16::new(1, 3, 5, 7, 1, 3, 5, 7, 0, 2, 4, 6, 0, 2, 4, 6); let r: u8x16 = transmute(vpminq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpminq_u16() { let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = u16x8::new(1, 3, 5, 7, 0, 2, 4, 6); let r: u16x8 = transmute(vpminq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpminq_u32() { let a = u32x4::new(1, 2, 3, 4); let b = u32x4::new(0, 3, 2, 5); let e = u32x4::new(1, 3, 0, 2); let r: u32x4 = transmute(vpminq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmin_f32() { let a = f32x4::new(1., -2., 3., 4.); let b = f32x4::new(0., 3., 2., 5.); let e = f32x4::new(-2., 3., 0., 2.); let r: f32x4 = transmute(vpminq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmin_f64() { let a = f64x2::new(1., -2.); let b = f64x2::new(0., 3.); let e = f64x2::new(-2., 0.); let r: f64x2 = transmute(vpminq_f64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmaxq_s8() { #[cfg_attr(rustfmt, skip)] let a = i8x16::new(1, -2, 3, -4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #[cfg_attr(rustfmt, skip)] let b = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9); #[cfg_attr(rustfmt, skip)] let e = i8x16::new(1, 3, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9); let r: i8x16 = transmute(vpmaxq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmaxq_s16() { let a = i16x8::new(1, -2, 3, 4, 5, 6, 7, 8); let b = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = i16x8::new(1, 4, 6, 8, 3, 5, 7, 9); let r: i16x8 = transmute(vpmaxq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmaxq_s32() { let a = i32x4::new(1, -2, 3, 4); let b = i32x4::new(0, 3, 2, 5); let e = i32x4::new(1, 4, 3, 5); let r: i32x4 = transmute(vpmaxq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmaxq_u8() { #[cfg_attr(rustfmt, skip)] let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); #[cfg_attr(rustfmt, skip)] let b = u8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9); #[cfg_attr(rustfmt, skip)] let e = u8x16::new(2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9); let r: u8x16 = transmute(vpmaxq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmaxq_u16() { let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u16x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = u16x8::new(2, 4, 6, 8, 3, 5, 7, 9); let r: u16x8 = transmute(vpmaxq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmaxq_u32() { let a = u32x4::new(1, 2, 3, 4); let b = u32x4::new(0, 3, 2, 5); let e = u32x4::new(2, 4, 3, 5); let r: u32x4 = transmute(vpmaxq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmax_f32() { let a = f32x4::new(1., -2., 3., 4.); let b = f32x4::new(0., 3., 2., 5.); let e = f32x4::new(1., 4., 3., 5.); let r: f32x4 = transmute(vpmaxq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmax_f64() { let a = f64x2::new(1., -2.); let b = f64x2::new(0., 3.); let e = f64x2::new(1., 3.); let r: f64x2 = transmute(vpmaxq_f64(transmute(a), transmute(b))); assert_eq!(r, e); } macro_rules! test_vcombine { ($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => { #[allow(unused_assignments)] #[simd_test(enable = "neon")] unsafe fn $test_id() { let a = [$($a),*]; let b = [$($b),*]; let e = [$($a),* $(, $b)*]; let c = $fn_id(transmute(a), transmute(b)); let mut d = e; d = transmute(c); assert_eq!(d, e); } } } test_vcombine!(test_vcombine_s8 => vcombine_s8([3_i8, -4, 5, -6, 7, 8, 9, 10], [13_i8, -14, 15, -16, 17, 18, 19, 110])); test_vcombine!(test_vcombine_u8 => vcombine_u8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110])); test_vcombine!(test_vcombine_p8 => vcombine_p8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110])); test_vcombine!(test_vcombine_s16 => vcombine_s16([3_i16, -4, 5, -6], [13_i16, -14, 15, -16])); test_vcombine!(test_vcombine_u16 => vcombine_u16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16])); test_vcombine!(test_vcombine_p16 => vcombine_p16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16])); // FIXME: 16-bit floats // test_vcombine!(test_vcombine_f16 => vcombine_f16([3_f16, 4., 5., 6.], // [13_f16, 14., 15., 16.])); test_vcombine!(test_vcombine_s32 => vcombine_s32([3_i32, -4], [13_i32, -14])); test_vcombine!(test_vcombine_u32 => vcombine_u32([3_u32, 4], [13_u32, 14])); // note: poly32x4 does not exist, and neither does vcombine_p32 test_vcombine!(test_vcombine_f32 => vcombine_f32([3_f32, -4.], [13_f32, -14.])); test_vcombine!(test_vcombine_s64 => vcombine_s64([-3_i64], [13_i64])); test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64])); test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64])); test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64])); } #[cfg(test)] #[cfg(target_endian = "little")] #[path = "../arm/table_lookup_tests.rs"] mod table_lookup_tests; core_arch-0.1.5/src/aarch64/v8.rs010064400007650000024000000050001343447103600146030ustar0000000000000000//! ARMv8 intrinsics. //! //! The reference is [ARMv8-A Reference Manual][armv8]. //! //! [armv8]: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc. //! ddi0487a.k_10775/index.html #[cfg(test)] use stdsimd_test::assert_instr; /// Reverse the order of the bytes. #[inline] #[cfg_attr(test, assert_instr(rev))] pub unsafe fn _rev_u64(x: u64) -> u64 { x.swap_bytes() as u64 } /// Count Leading Zeros. #[inline] #[cfg_attr(test, assert_instr(clz))] pub unsafe fn _clz_u64(x: u64) -> u64 { x.leading_zeros() as u64 } /// Reverse the bit order. #[inline] #[cfg_attr(test, assert_instr(rbit))] pub unsafe fn _rbit_u64(x: u64) -> u64 { crate::intrinsics::bitreverse(x) } /// Counts the leading most significant bits set. /// /// When all bits of the operand are set it returns the size of the operand in /// bits. #[inline] #[cfg_attr(test, assert_instr(cls))] pub unsafe fn _cls_u32(x: u32) -> u32 { u32::leading_zeros((((((x as i32) >> 31) as u32) ^ x) << 1) | 1) as u32 } /// Counts the leading most significant bits set. /// /// When all bits of the operand are set it returns the size of the operand in /// bits. #[inline] #[cfg_attr(test, assert_instr(cls))] pub unsafe fn _cls_u64(x: u64) -> u64 { u64::leading_zeros((((((x as i64) >> 63) as u64) ^ x) << 1) | 1) as u64 } #[cfg(test)] mod tests { use crate::core_arch::aarch64::v8; #[test] fn _rev_u64() { unsafe { assert_eq!( v8::_rev_u64(0b0000_0000_1111_1111_0000_0000_1111_1111_u64), 0b1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64 ); } } #[test] fn _clz_u64() { unsafe { assert_eq!(v8::_clz_u64(0b0000_1010u64), 60u64); } } #[test] fn _rbit_u64() { unsafe { assert_eq!( v8::_rbit_u64(0b0000_0000_1111_1101_0000_0000_1111_1111_u64), 0b1111_1111_0000_0000_1011_1111_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_u64 ); } } #[test] fn _cls_u32() { unsafe { assert_eq!( v8::_cls_u32(0b1111_1111_1111_1111_0000_0000_1111_1111_u32), 15_u32 ); } } #[test] fn _cls_u64() { unsafe { assert_eq!( v8::_cls_u64( 0b1111_1111_1111_1111_0000_0000_1111_1111_0000_0000_0000_0000_0000_0000_0000_0000_u64 ), 15_u64 ); } } } core_arch-0.1.5/src/acle/barrier/common.rs010064400007650000024000000004761343257532600166610ustar0000000000000000//! Access types available on all architectures /// Full system is the required shareability domain, reads and writes are the /// required access types pub struct SY; dmb_dsb!(SY); impl super::super::sealed::Isb for SY { #[inline(always)] unsafe fn __isb(&self) { super::isb(super::arg::SY) } } core_arch-0.1.5/src/acle/barrier/cp15.rs010064400007650000024000000013661343257532600161400ustar0000000000000000// Reference: ARM11 MPCore Processor Technical Reference Manual (ARM DDI 0360E) Section 3.5 "Summary // of CP15 instructions" /// Full system is the required shareability domain, reads and writes are the /// required access types pub struct SY; impl super::super::sealed::Dmb for SY { #[inline(always)] unsafe fn __dmb(&self) { asm!("mcr p15, 0, r0, c7, c10, 5" : : : "memory" : "volatile") } } impl super::super::sealed::Dsb for SY { #[inline(always)] unsafe fn __dsb(&self) { asm!("mcr p15, 0, r0, c7, c10, 4" : : : "memory" : "volatile") } } impl super::super::sealed::Isb for SY { #[inline(always)] unsafe fn __isb(&self) { asm!("mcr p15, 0, r0, c7, c5, 4" : : : "memory" : "volatile") } } core_arch-0.1.5/src/acle/barrier/mod.rs010064400007650000024000000105531343257532600161450ustar0000000000000000// Reference: Section 7.4 "Hints" of ACLE // CP15 instruction #[cfg(not(any( // v8 target_arch = "aarch64", // v7 target_feature = "v7", // v6-M target_feature = "mclass" )))] mod cp15; #[cfg(not(any( target_arch = "aarch64", target_feature = "v7", target_feature = "mclass" )))] pub use self::cp15::*; // Dedicated instructions #[cfg(any( target_arch = "aarch64", target_feature = "v7", target_feature = "mclass" ))] macro_rules! dmb_dsb { ($A:ident) => { impl super::super::sealed::Dmb for $A { #[inline(always)] unsafe fn __dmb(&self) { super::dmb(super::arg::$A) } } impl super::super::sealed::Dsb for $A { #[inline(always)] unsafe fn __dsb(&self) { super::dsb(super::arg::$A) } } }; } #[cfg(any( target_arch = "aarch64", target_feature = "v7", target_feature = "mclass" ))] mod common; #[cfg(any( target_arch = "aarch64", target_feature = "v7", target_feature = "mclass" ))] pub use self::common::*; #[cfg(any(target_arch = "aarch64", target_feature = "v7",))] mod not_mclass; #[cfg(any(target_arch = "aarch64", target_feature = "v7",))] pub use self::not_mclass::*; #[cfg(target_arch = "aarch64")] mod v8; #[cfg(target_arch = "aarch64")] pub use self::v8::*; /// Generates a DMB (data memory barrier) instruction or equivalent CP15 instruction. /// /// DMB ensures the observed ordering of memory accesses. Memory accesses of the specified type /// issued before the DMB are guaranteed to be observed (in the specified scope) before memory /// accesses issued after the DMB. /// /// For example, DMB should be used between storing data, and updating a flag variable that makes /// that data available to another core. /// /// The __dmb() intrinsic also acts as a compiler memory barrier of the appropriate type. #[inline(always)] pub unsafe fn __dmb(arg: A) where A: super::sealed::Dmb, { arg.__dmb() } /// Generates a DSB (data synchronization barrier) instruction or equivalent CP15 instruction. /// /// DSB ensures the completion of memory accesses. A DSB behaves as the equivalent DMB and has /// additional properties. After a DSB instruction completes, all memory accesses of the specified /// type issued before the DSB are guaranteed to have completed. /// /// The __dsb() intrinsic also acts as a compiler memory barrier of the appropriate type. #[inline(always)] pub unsafe fn __dsb(arg: A) where A: super::sealed::Dsb, { arg.__dsb() } /// Generates an ISB (instruction synchronization barrier) instruction or equivalent CP15 /// instruction. /// /// This instruction flushes the processor pipeline fetch buffers, so that following instructions /// are fetched from cache or memory. /// /// An ISB is needed after some system maintenance operations. An ISB is also needed before /// transferring control to code that has been loaded or modified in memory, for example by an /// overlay mechanism or just-in-time code generator. (Note that if instruction and data caches are /// separate, privileged cache maintenance operations would be needed in order to unify the caches.) /// /// The only supported argument for the __isb() intrinsic is 15, corresponding to the SY (full /// system) scope of the ISB instruction. #[inline(always)] pub unsafe fn __isb(arg: A) where A: super::sealed::Isb, { arg.__isb() } extern "C" { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dmb")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")] fn dmb(_: i32); #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dsb")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dsb")] fn dsb(_: i32); #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.isb")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.isb")] fn isb(_: i32); } // we put these in a module to prevent weirdness with glob re-exports mod arg { // See Section 7.3 Memory barriers of ACLE pub const SY: i32 = 15; pub const ST: i32 = 14; pub const LD: i32 = 13; pub const ISH: i32 = 11; pub const ISHST: i32 = 10; pub const ISHLD: i32 = 9; pub const NSH: i32 = 7; pub const NSHST: i32 = 6; pub const NSHLD: i32 = 5; pub const OSH: i32 = 3; pub const OSHST: i32 = 2; pub const OSHLD: i32 = 1; } core_arch-0.1.5/src/acle/barrier/not_mclass.rs010064400007650000024000000017641343257532600175340ustar0000000000000000//! Access types available on v7 and v8 but not on v7(E)-M or v8-M /// Full system is the required shareability domain, writes are the required /// access type pub struct ST; dmb_dsb!(ST); /// Inner Shareable is the required shareability domain, reads and writes are /// the required access types pub struct ISH; dmb_dsb!(ISH); /// Inner Shareable is the required shareability domain, writes are the required /// access type pub struct ISHST; dmb_dsb!(ISHST); /// Non-shareable is the required shareability domain, reads and writes are the /// required access types pub struct NSH; dmb_dsb!(NSH); /// Non-shareable is the required shareability domain, writes are the required /// access type pub struct NSHST; dmb_dsb!(NSHST); /// Outer Shareable is the required shareability domain, reads and writes are /// the required access types pub struct OSH; dmb_dsb!(OSH); /// Outer Shareable is the required shareability domain, writes are the required /// access type pub struct OSHST; dmb_dsb!(OSHST); core_arch-0.1.5/src/acle/barrier/v8.rs010064400007650000024000000010101343257532600157070ustar0000000000000000/// Full system is the required shareability domain, reads are the required /// access type pub struct LD; dmb_dsb!(LD); /// Inner Shareable is the required shareability domain, reads are the required /// access type pub struct ISHLD; dmb_dsb!(ISHLD); /// Non-shareable is the required shareability domain, reads are the required /// access type pub struct NSHLD; dmb_dsb!(NSHLD); /// Outher Shareable is the required shareability domain, reads are the required /// access type pub struct OSHLD; dmb_dsb!(OSHLD); core_arch-0.1.5/src/acle/dsp.rs010064400007650000024000000245341345531200000145110ustar0000000000000000//! # References: //! //! - Section 8.3 "16-bit multiplications" //! //! Intrinsics that could live here: //! //! - \[x\] __smulbb //! - \[x\] __smulbt //! - \[x\] __smultb //! - \[x\] __smultt //! - \[x\] __smulwb //! - \[x\] __smulwt //! - \[x\] __qadd //! - \[x\] __qsub //! - \[x\] __qdbl //! - \[x\] __smlabb //! - \[x\] __smlabt //! - \[x\] __smlatb //! - \[x\] __smlatt //! - \[x\] __smlawb //! - \[x\] __smlawt #[cfg(test)] use stdsimd_test::assert_instr; use crate::mem::transmute; types! { /// ARM-specific 32-bit wide vector of two packed `i16`. pub struct int16x2_t(i16, i16); /// ARM-specific 32-bit wide vector of two packed `u16`. pub struct uint16x2_t(u16, u16); } extern "C" { #[link_name = "llvm.arm.smulbb"] fn arm_smulbb(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smulbt"] fn arm_smulbt(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smultb"] fn arm_smultb(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smultt"] fn arm_smultt(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smulwb"] fn arm_smulwb(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smulwt"] fn arm_smulwt(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.qadd"] fn arm_qadd(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.qsub"] fn arm_qsub(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smlabb"] fn arm_smlabb(a: i32, b: i32, c: i32) -> i32; #[link_name = "llvm.arm.smlabt"] fn arm_smlabt(a: i32, b: i32, c: i32) -> i32; #[link_name = "llvm.arm.smlatb"] fn arm_smlatb(a: i32, b: i32, c: i32) -> i32; #[link_name = "llvm.arm.smlatt"] fn arm_smlatt(a: i32, b: i32, c: i32) -> i32; #[link_name = "llvm.arm.smlawb"] fn arm_smlawb(a: i32, b: i32, c: i32) -> i32; #[link_name = "llvm.arm.smlawt"] fn arm_smlawt(a: i32, b: i32, c: i32) -> i32; } /// Insert a SMULBB instruction /// /// Returns the equivalent of a\[0\] * b\[0\] /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. #[inline] #[cfg_attr(test, assert_instr(smulbb))] pub unsafe fn __smulbb(a: int16x2_t, b: int16x2_t) -> i32 { arm_smulbb(transmute(a), transmute(b)) } /// Insert a SMULTB instruction /// /// Returns the equivalent of a\[0\] * b\[1\] /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. #[inline] #[cfg_attr(test, assert_instr(smultb))] pub unsafe fn __smultb(a: int16x2_t, b: int16x2_t) -> i32 { arm_smultb(transmute(a), transmute(b)) } /// Insert a SMULTB instruction /// /// Returns the equivalent of a\[1\] * b\[0\] /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. #[inline] #[cfg_attr(test, assert_instr(smulbt))] pub unsafe fn __smulbt(a: int16x2_t, b: int16x2_t) -> i32 { arm_smulbt(transmute(a), transmute(b)) } /// Insert a SMULTT instruction /// /// Returns the equivalent of a\[1\] * b\[1\] /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. #[inline] #[cfg_attr(test, assert_instr(smultt))] pub unsafe fn __smultt(a: int16x2_t, b: int16x2_t) -> i32 { arm_smultt(transmute(a), transmute(b)) } /// Insert a SMULWB instruction /// /// Multiplies the 32-bit signed first operand with the low halfword /// (as a 16-bit signed integer) of the second operand. /// Return the top 32 bits of the 48-bit product #[inline] #[cfg_attr(test, assert_instr(smulwb))] pub unsafe fn __smulwb(a: int16x2_t, b: i32) -> i32 { arm_smulwb(transmute(a), b) } /// Insert a SMULWT instruction /// /// Multiplies the 32-bit signed first operand with the high halfword /// (as a 16-bit signed integer) of the second operand. /// Return the top 32 bits of the 48-bit product #[inline] #[cfg_attr(test, assert_instr(smulwt))] pub unsafe fn __smulwt(a: int16x2_t, b: i32) -> i32 { arm_smulwt(transmute(a), b) } /// Signed saturating addition /// /// Returns the 32-bit saturating signed equivalent of a + b. /// Sets the Q flag if saturation occurs. #[inline] #[cfg_attr(test, assert_instr(qadd))] pub unsafe fn __qadd(a: i32, b: i32) -> i32 { arm_qadd(a, b) } /// Signed saturating subtraction /// /// Returns the 32-bit saturating signed equivalent of a - b. /// Sets the Q flag if saturation occurs. #[inline] #[cfg_attr(test, assert_instr(qsub))] pub unsafe fn __qsub(a: i32, b: i32) -> i32 { arm_qsub(a, b) } /// Insert a QADD instruction /// /// Returns the 32-bit saturating signed equivalent of a + a /// Sets the Q flag if saturation occurs. #[inline] #[cfg_attr(test, assert_instr(qadd))] pub unsafe fn __qdbl(a: i32) -> i32 { arm_qadd(a, a) } /// Insert a SMLABB instruction /// /// Returns the equivalent of a\[0\] * b\[0\] + c /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. /// Sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smlabb))] pub unsafe fn __smlabb(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlabb(transmute(a), transmute(b), c) } /// Insert a SMLABT instruction /// /// Returns the equivalent of a\[0\] * b\[1\] + c /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. /// Sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smlabt))] pub unsafe fn __smlabt(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlabt(transmute(a), transmute(b), c) } /// Insert a SMLATB instruction /// /// Returns the equivalent of a\[1\] * b\[0\] + c /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. /// Sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smlatb))] pub unsafe fn __smlatb(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlatb(transmute(a), transmute(b), c) } /// Insert a SMLATT instruction /// /// Returns the equivalent of a\[1\] * b\[1\] + c /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. /// Sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smlatt))] pub unsafe fn __smlatt(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlatt(transmute(a), transmute(b), c) } /// Insert a SMLAWB instruction /// /// Returns the equivalent of (a * b\[0\] + (c << 16)) >> 16 /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. /// Sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smlawb))] pub unsafe fn __smlawb(a: i32, b: int16x2_t, c: i32) -> i32 { arm_smlawb(a, transmute(b), c) } /// Insert a SMLAWT instruction /// /// Returns the equivalent of (a * b\[1\] + (c << 16)) >> 16 /// where \[0\] is the lower 16 bits and \[1\] is the upper 16 bits. /// Sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smlawt))] pub unsafe fn __smlawt(a: i32, b: int16x2_t, c: i32) -> i32 { arm_smlawt(a, transmute(b), c) } #[cfg(test)] mod tests { use crate::core_arch::arm::*; use crate::core_arch::simd::{i16x2, i8x4, u8x4}; use std::mem::transmute; use stdsimd_test::simd_test; #[test] fn smulbb() { unsafe { let a = i16x2::new(10, 20); let b = i16x2::new(30, 40); assert_eq!(super::__smulbb(transmute(a), transmute(b)), 10 * 30); } } #[test] fn smulbt() { unsafe { let a = i16x2::new(10, 20); let b = i16x2::new(30, 40); assert_eq!(super::__smulbt(transmute(a), transmute(b)), 10 * 40); } } #[test] fn smultb() { unsafe { let a = i16x2::new(10, 20); let b = i16x2::new(30, 40); assert_eq!(super::__smultb(transmute(a), transmute(b)), 20 * 30); } } #[test] fn smultt() { unsafe { let a = i16x2::new(10, 20); let b = i16x2::new(30, 40); assert_eq!(super::__smultt(transmute(a), transmute(b)), 20 * 40); } } #[test] fn smulwb() { unsafe { let a = i16x2::new(10, 20); let b = 30; assert_eq!(super::__smulwb(transmute(a), b), 20 * b); } } #[test] fn smulwt() { unsafe { let a = i16x2::new(10, 20); let b = 30; assert_eq!(super::__smulwt(transmute(a), b), (10 * b) >> 16); } } #[test] fn qadd() { unsafe { assert_eq!(super::__qadd(-10, 60), 50); assert_eq!(super::__qadd(::std::i32::MAX, 10), ::std::i32::MAX); assert_eq!(super::__qadd(::std::i32::MIN, -10), ::std::i32::MIN); } } #[test] fn qsub() { unsafe { assert_eq!(super::__qsub(10, 60), -50); assert_eq!(super::__qsub(::std::i32::MAX, -10), ::std::i32::MAX); assert_eq!(super::__qsub(::std::i32::MIN, 10), ::std::i32::MIN); } } fn qdbl() { unsafe { assert_eq!(super::__qdbl(10), 20); assert_eq!(super::__qdbl(::std::i32::MAX), ::std::i32::MAX); } } fn smlabb() { unsafe { let a = i16x2::new(10, 20); let b = i16x2::new(30, 40); let c = 50; let r = (10 * 30) + c; assert_eq!(super::__smlabb(transmute(a), transmute(b), c), r); } } fn smlabt() { unsafe { let a = i16x2::new(10, 20); let b = i16x2::new(30, 40); let c = 50; let r = (10 * 40) + c; assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r); } } fn smlatb() { unsafe { let a = i16x2::new(10, 20); let b = i16x2::new(30, 40); let c = 50; let r = (20 * 30) + c; assert_eq!(super::__smlabt(transmute(a), transmute(b), c), r); } } fn smlatt() { unsafe { let a = i16x2::new(10, 20); let b = i16x2::new(30, 40); let c = 50; let r = (20 * 40) + c; assert_eq!(super::__smlatt(transmute(a), transmute(b), c), r); } } fn smlawb() { unsafe { let a: i32 = 10; let b = i16x2::new(30, 40); let c: i32 = 50; let r: i32 = ((a * 30) + (c << 16)) >> 16; assert_eq!(super::__smlawb(a, transmute(b), c), r); } } fn smlawt() { unsafe { let a: i32 = 10; let b = i16x2::new(30, 40); let c: i32 = 50; let r: i32 = ((a * 40) + (c << 16)) >> 16; assert_eq!(super::__smlawt(a, transmute(b), c), r); } } } core_arch-0.1.5/src/acle/ex.rs010064400007650000024000000064421343257532600143560ustar0000000000000000// Reference: Section 5.4.4 "LDREX / STREX" of ACLE /// Removes the exclusive lock created by LDREX // Supported: v6, v6K, v7-M, v7-A, v7-R // Not supported: v5, v6-M // NOTE: there's no dedicated CLREX instruction in v6 ( u8 { extern "C" { #[link_name = "llvm.arm.ldrex.p0i8"] fn ldrex8(p: *const u8) -> u32; } ldrex8(p) as u8 } /// Executes a exclusive LDR instruction for 16 bit value. // Supported: v6K, v7-M, v7-A, v7-R, v8 // Not supported: v5, v6, v6-M #[cfg( target_feature = "v6k", // includes v7-M but excludes v6-M )] pub unsafe fn __ldrexh(p: *const u16) -> u16 { extern "C" { #[link_name = "llvm.arm.ldrex.p0i16"] fn ldrex16(p: *const u16) -> u32; } ldrex16(p) as u16 } /// Executes a exclusive LDR instruction for 32 bit value. // Supported: v6, v7-M, v6K, v7-A, v7-R, v8 // Not supported: v5, v6-M #[cfg(any( all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M ))] pub unsafe fn __ldrex(p: *const u32) -> u32 { extern "C" { #[link_name = "llvm.arm.ldrex.p0i32"] fn ldrex32(p: *const u32) -> u32; } ldrex32(p) } /// Executes a exclusive STR instruction for 8 bit values /// /// Returns `0` if the operation succeeded, or `1` if it failed // supported: v6K, v7-M, v7-A, v7-R // Not supported: v5, v6, v6-M #[cfg( target_feature = "v6k", // includes v7-M but excludes v6-M )] pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i8"] fn strex8(value: u32, addr: *mut u8) -> u32; } strex8(value, addr) } /// Executes a exclusive STR instruction for 16 bit values /// /// Returns `0` if the operation succeeded, or `1` if it failed // Supported: v6K, v7-M, v7-A, v7-R, v8 // Not supported: v5, v6, v6-M #[cfg( target_feature = "v6k", // includes v7-M but excludes v6-M )] pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i16"] fn strex16(value: u32, addr: *mut u16) -> u32; } strex16(value as u32, addr) } /// Executes a exclusive STR instruction for 32 bit values /// /// Returns `0` if the operation succeeded, or `1` if it failed // Supported: v6, v7-M, v6K, v7-A, v7-R, v8 // Not supported: v5, v6-M #[cfg(any( all(target_feature = "v6", not(target_feature = "mclass")), // excludes v6-M all(target_feature = "v7", target_feature = "mclass"), // v7-M ))] pub unsafe fn __strex(value: u32, addr: *mut u32) -> u32 { extern "C" { #[link_name = "llvm.arm.strex.p0i32"] fn strex32(value: u32, addr: *mut u32) -> u32; } strex32(value, addr) } core_arch-0.1.5/src/acle/hints.rs010064400007650000024000000110121343257532600150540ustar0000000000000000// # References // // - Section 7.4 "Hints" of ACLE // - Section 7.7 "NOP" of ACLE /// Generates a WFI (wait for interrupt) hint instruction, or nothing. /// /// The WFI instruction allows (but does not require) the processor to enter a /// low-power state until one of a number of asynchronous events occurs. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" #[cfg(any(target_feature = "v6", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __wfi() { hint(HINT_WFI); } /// Generates a WFE (wait for event) hint instruction, or nothing. /// /// The WFE instruction allows (but does not require) the processor to enter a /// low-power state until some event occurs such as a SEV being issued by /// another processor. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" #[cfg(any(target_feature = "v6", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __wfe() { hint(HINT_WFE); } /// Generates a SEV (send a global event) hint instruction. /// /// This causes an event to be signaled to all processors in a multiprocessor /// system. It is a NOP on a uniprocessor system. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M, 7-M // LLVM says "instruction requires: armv6k" #[cfg(any(target_feature = "v6", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __sev() { hint(HINT_SEV); } /// Generates a send a local event hint instruction. /// /// This causes an event to be signaled to only the processor executing this /// instruction. In a multiprocessor system, it is not required to affect the /// other processors. // LLVM says "instruction requires: armv8" #[cfg(any( target_feature = "v8", // 32-bit ARMv8 target_arch = "aarch64", // AArch64 ))] #[inline(always)] pub unsafe fn __sevl() { hint(HINT_SEVL); } /// Generates a YIELD hint instruction. /// /// This enables multithreading software to indicate to the hardware that it is /// performing a task, for example a spin-lock, that could be swapped out to /// improve overall system performance. // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M // LLVM says "instruction requires: armv6k" #[cfg(any(target_feature = "v6", target_arch = "aarch64"))] #[inline(always)] pub unsafe fn __yield() { hint(HINT_YIELD); } /// Generates a DBG instruction. /// /// This provides a hint to debugging and related systems. The argument must be /// a constant integer from 0 to 15 inclusive. See implementation documentation /// for the effect (if any) of this instruction and the meaning of the /// argument. This is available only when compliling for AArch32. // Section 10.1 of ACLE says that the supported arches are: 7, 7-M // "The DBG hint instruction is added in ARMv7. It is UNDEFINED in the ARMv6 base architecture, and // executes as a NOP instruction in ARMv6K and ARMv6T2." - ARM Architecture Reference Manual ARMv7-A // and ARMv7-R edition (ARM DDI 0406C.c) sections D12.4.1 "ARM instruction set support" and D12.4.2 // "Thumb instruction set support" #[cfg(target_feature = "v7")] #[inline(always)] #[rustc_args_required_const(0)] pub unsafe fn __dbg(imm4: u32) { macro_rules! call { ($imm4:expr) => { asm!(concat!("DBG ", stringify!($imm4)) : : : : "volatile") } } match imm4 & 0b1111 { 0 => call!(0), 1 => call!(1), 2 => call!(2), 3 => call!(3), 4 => call!(4), 5 => call!(5), 6 => call!(6), 7 => call!(7), 8 => call!(8), 9 => call!(9), 10 => call!(10), 11 => call!(11), 12 => call!(12), 13 => call!(13), 14 => call!(14), _ => call!(15), } } /// Generates an unspecified no-op instruction. /// /// Note that not all architectures provide a distinguished NOP instruction. On /// those that do, it is unspecified whether this intrinsic generates it or /// another instruction. It is not guaranteed that inserting this instruction /// will increase execution time. #[inline(always)] pub unsafe fn __nop() { asm!("NOP" : : : : "volatile") } extern "C" { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")] fn hint(_: i32); } // from LLVM 7.0.1's lib/Target/ARM/{ARMInstrThumb,ARMInstrInfo,ARMInstrThumb2}.td const HINT_NOP: i32 = 0; const HINT_YIELD: i32 = 1; const HINT_WFE: i32 = 2; const HINT_WFI: i32 = 3; const HINT_SEV: i32 = 4; const HINT_SEVL: i32 = 5; core_arch-0.1.5/src/acle/mod.rs010064400007650000024000000115001343257532600145100ustar0000000000000000//! ARM C Language Extensions (ACLE) //! //! # Developer notes //! //! Below is a list of built-in targets that are representative of the different ARM //! architectures; the list includes the `target_feature`s they possess. //! //! - `armv4t-unknown-linux-gnueabi` - **ARMv4** - `+v4t` //! - `armv5te-unknown-linux-gnueabi` - **ARMv5TE** - `+v4t +v5te` //! - `arm-unknown-linux-gnueabi` - **ARMv6** - `+v4t +v5te +v6` //! - `thumbv6m-none-eabi` - **ARMv6-M** - `+v4t +v5te +v6 +thumb-mode +mclass` //! - `armv7-unknown-linux-gnueabihf` - **ARMv7-A** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +aclass` //! - `armv7r-none-eabi` - **ARMv7-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +rclass` //! - `thumbv7m-none-eabi` - **ARMv7-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass` //! - `thumbv7em-none-eabi` - **ARMv7E-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +dsp +thumb2 +thumb-mode +mclass` //! - `thumbv8m.main-none-eabi` - **ARMv8-M** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +thumb2 +thumb-mode +mclass` //! - `armv8r-none-eabi` - **ARMv8-R** - `+v4t +v5te +v6 +v6k +v6t2 +v7 +v8 +thumb2 +rclass` //! - `aarch64-unknown-linux-gnu` - **ARMv8-A (AArch64)** - `+fp +neon` //! //! Section 10.1 of ACLE says: //! //! - "In the sequence of Arm architectures { v5, v5TE, v6, v6T2, v7 } each architecture includes //! its predecessor instruction set." //! //! - "In the sequence of Thumb-only architectures { v6-M, v7-M, v7E-M } each architecture includes //! its predecessor instruction set." //! //! From that info and from looking at how LLVM features work (using custom targets) we can identify //! features that are subsets of others: //! //! Legend: `a < b` reads as "`a` is a subset of `b`"; this means that if `b` is enabled then `a` is //! enabled as well. //! //! - `v4t < v5te < v6 < v6k < v6t2 < v7 < v8` //! - `v6 < v8m < v6t2` //! - `v7 < v8m.main` //! //! *NOTE*: Section 5.4.7 of ACLE says: //! //! - "__ARM_FEATURE_DSP is defined to 1 if the DSP (v5E) instructions are supported and the //! intrinsics defined in Saturating intrinsics are available." //! //! This does *not* match how LLVM uses the '+dsp' feature; this feature is not set for v5te //! targets so we have to work around this difference. //! //! # References //! //! - [ACLE Q2 2018](https://developer.arm.com/docs/101028/latest) // 8, 7 and 6-M are supported via dedicated instructions like DMB. All other arches are supported // via CP15 instructions. See Section 10.1 of ACLE mod barrier; pub use self::barrier::*; mod hints; pub use self::hints::*; mod registers; pub use self::registers::*; mod ex; pub use self::ex::*; // Supported arches: 5TE, 7E-M. See Section 10.1 of ACLE (e.g. QADD) // We also include the A profile even though DSP is deprecated on that profile as of ACLE 2.0 (see // section 5.4.7) // Here we workaround the difference between LLVM's +dsp and ACLE's __ARM_FEATURE_DSP by gating on // '+v5te' rather than on '+dsp' #[cfg(all( not(target_arch = "aarch64"), any( // >= v5TE but excludes v7-M all(target_feature = "v5te", not(target_feature = "mclass")), // v7E-M all(target_feature = "mclass", target_feature = "dsp"), ) ))] mod dsp; #[cfg(all( not(target_arch = "aarch64"), any( all(target_feature = "v5te", not(target_feature = "mclass")), all(target_feature = "mclass", target_feature = "dsp"), ) ))] pub use self::dsp::*; // Supported arches: 6, 7-M. See Section 10.1 of ACLE (e.g. SSAT) #[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] mod sat; #[cfg(all(not(target_arch = "aarch64"), target_feature = "v6",))] pub use self::sat::*; // Deprecated in ACLE 2.0 for the A profile but fully supported on the M and R profiles, says // Section 5.4.9 of ACLE. We'll expose these for the A profile even if deprecated #[cfg(all( not(target_arch = "aarch64"), any( // v7-A, v7-R all(target_feature = "v6", not(target_feature = "mclass")), // v7E-M all(target_feature = "mclass", target_feature = "dsp") ) ))] mod simd32; #[cfg(all( not(target_arch = "aarch64"), any( all(target_feature = "v6", not(target_feature = "mclass")), all(target_feature = "mclass", target_feature = "dsp") ) ))] pub use self::simd32::*; mod sealed { pub trait Dmb { unsafe fn __dmb(&self); } pub trait Dsb { unsafe fn __dsb(&self); } pub trait Isb { unsafe fn __isb(&self); } pub trait Rsr { unsafe fn __rsr(&self) -> u32; } pub trait Rsr64 { unsafe fn __rsr64(&self) -> u64; } pub trait Rsrp { unsafe fn __rsrp(&self) -> *const u8; } pub trait Wsr { unsafe fn __wsr(&self, value: u32); } pub trait Wsr64 { unsafe fn __wsr64(&self, value: u64); } pub trait Wsrp { unsafe fn __wsrp(&self, value: *const u8); } } core_arch-0.1.5/src/acle/registers/aarch32.rs010064400007650000024000000005741344374632200171720ustar0000000000000000/// Application Program Status Register pub struct APSR; // Note (@Lokathor): Because this breaks the use of Rust on the Game Boy // Advance, this change must be reverted until Rust learns to handle cpu state // properly. See also: https://github.com/rust-lang-nursery/stdsimd/issues/702 //#[cfg(any(not(target_feature = "thumb-state"), target_feature = "v6t2"))] //rsr!(APSR); core_arch-0.1.5/src/acle/registers/mod.rs010064400007650000024000000051131343257532600165220ustar0000000000000000#[allow(unused_macros)] macro_rules! rsr { ($R:ident) => { impl super::super::sealed::Rsr for $R { unsafe fn __rsr(&self) -> u32 { let r: u32; asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile"); r } } }; } #[allow(unused_macros)] macro_rules! rsrp { ($R:ident) => { impl super::super::sealed::Rsrp for $R { unsafe fn __rsrp(&self) -> *const u8 { let r: *const u8; asm!(concat!("mrs $0,", stringify!($R)) : "=r"(r) : : : "volatile"); r } } }; } #[allow(unused_macros)] macro_rules! wsr { ($R:ident) => { impl super::super::sealed::Wsr for $R { unsafe fn __wsr(&self, value: u32) { asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile"); } } }; } #[allow(unused_macros)] macro_rules! wsrp { ($R:ident) => { impl super::super::sealed::Wsrp for $R { unsafe fn __wsrp(&self, value: *const u8) { asm!(concat!("msr ", stringify!($R), ",$0") : : "r"(value) : : "volatile"); } } }; } #[cfg(target_feature = "mclass")] mod v6m; #[cfg(target_feature = "mclass")] pub use self::v6m::*; #[cfg(all(target_feature = "v7", target_feature = "mclass"))] mod v7m; #[cfg(all(target_feature = "v7", target_feature = "mclass"))] pub use self::v7m::*; #[cfg(not(target_arch = "aarch64"))] mod aarch32; #[cfg(not(target_arch = "aarch64"))] pub use self::aarch32::*; /// Reads a 32-bit system register #[inline(always)] pub unsafe fn __rsr(reg: R) -> u32 where R: super::sealed::Rsr, { reg.__rsr() } /// Reads a 64-bit system register #[cfg(target_arch = "aarch64")] #[inline(always)] pub unsafe fn __rsr64(reg: R) -> u64 where R: super::sealed::Rsr64, { reg.__rsr64() } /// Reads a system register containing an address #[inline(always)] pub unsafe fn __rsrp(reg: R) -> *const u8 where R: super::sealed::Rsrp, { reg.__rsrp() } /// Writes a 32-bit system register #[inline(always)] pub unsafe fn __wsr(reg: R, value: u32) where R: super::sealed::Wsr, { reg.__wsr(value) } /// Writes a 64-bit system register #[cfg(target_arch = "aarch64")] #[inline(always)] pub unsafe fn __wsr64(reg: R, value: u64) where R: super::sealed::Wsr64, { reg.__wsr64(value) } /// Writes a system register containing an address #[inline(always)] pub unsafe fn __wsrp(reg: R, value: *const u8) where R: super::sealed::Wsrp, { reg.__wsrp(value) } core_arch-0.1.5/src/acle/registers/v6m.rs010064400007650000024000000010001343257532600164420ustar0000000000000000/// CONTROL register pub struct CONTROL; rsr!(CONTROL); wsr!(CONTROL); /// Execution Program Status Register pub struct EPSR; rsr!(EPSR); /// Interrupt Program Status Register pub struct IPSR; rsr!(IPSR); /// Main Stack Pointer pub struct MSP; rsrp!(MSP); wsrp!(MSP); /// Priority Mask Register pub struct PRIMASK; rsr!(PRIMASK); wsr!(PRIMASK); /// Process Stack Pointer pub struct PSP; rsrp!(PSP); wsrp!(PSP); /// Program Status Register #[allow(non_camel_case_types)] pub struct xPSR; rsr!(xPSR); core_arch-0.1.5/src/acle/registers/v7m.rs010064400007650000024000000004451343257532600164570ustar0000000000000000/// Base Priority Mask Register pub struct BASEPRI; rsr!(BASEPRI); wsr!(BASEPRI); /// Base Priority Mask Register (conditional write) #[allow(non_camel_case_types)] pub struct BASEPRI_MAX; wsr!(BASEPRI_MAX); /// Fault Mask Register pub struct FAULTMASK; rsr!(FAULTMASK); wsr!(FAULTMASK); core_arch-0.1.5/src/acle/sat.rs010064400007650000024000000002071343257532600145220ustar0000000000000000//! # References: //! //! - Section 8.4 "Saturating intrinsics" //! //! Intrinsics that could live here: //! //! - __ssat //! - __usat core_arch-0.1.5/src/acle/simd32.rs010064400007650000024000000447061345531200000150270ustar0000000000000000//! # References //! //! - Section 8.5 "32-bit SIMD intrinsics" of ACLE //! //! Intrinsics that could live here //! //! - \[x\] __sel //! - \[ \] __ssat16 //! - \[ \] __usat16 //! - \[ \] __sxtab16 //! - \[ \] __sxtb16 //! - \[ \] __uxtab16 //! - \[ \] __uxtb16 //! - \[x\] __qadd8 //! - \[x\] __qsub8 //! - \[x\] __sadd8 //! - \[x\] __shadd8 //! - \[x\] __shsub8 //! - \[x\] __ssub8 //! - \[ \] __uadd8 //! - \[ \] __uhadd8 //! - \[ \] __uhsub8 //! - \[ \] __uqadd8 //! - \[ \] __uqsub8 //! - \[x\] __usub8 //! - \[x\] __usad8 //! - \[x\] __usada8 //! - \[x\] __qadd16 //! - \[x\] __qasx //! - \[x\] __qsax //! - \[x\] __qsub16 //! - \[x\] __sadd16 //! - \[x\] __sasx //! - \[x\] __shadd16 //! - \[ \] __shasx //! - \[ \] __shsax //! - \[x\] __shsub16 //! - \[ \] __ssax //! - \[ \] __ssub16 //! - \[ \] __uadd16 //! - \[ \] __uasx //! - \[ \] __uhadd16 //! - \[ \] __uhasx //! - \[ \] __uhsax //! - \[ \] __uhsub16 //! - \[ \] __uqadd16 //! - \[ \] __uqasx //! - \[x\] __uqsax //! - \[ \] __uqsub16 //! - \[ \] __usax //! - \[ \] __usub16 //! - \[x\] __smlad //! - \[ \] __smladx //! - \[ \] __smlald //! - \[ \] __smlaldx //! - \[x\] __smlsd //! - \[ \] __smlsdx //! - \[ \] __smlsld //! - \[ \] __smlsldx //! - \[x\] __smuad //! - \[x\] __smuadx //! - \[x\] __smusd //! - \[x\] __smusdx #[cfg(test)] use stdsimd_test::assert_instr; use crate::mem::transmute; use core_arch::acle::dsp::int16x2_t; types! { /// ARM-specific 32-bit wide vector of four packed `i8`. pub struct int8x4_t(i8, i8, i8, i8); /// ARM-specific 32-bit wide vector of four packed `u8`. pub struct uint8x4_t(u8, u8, u8, u8); } macro_rules! dsp_call { ($name:expr, $a:expr, $b:expr) => { transmute($name(transmute($a), transmute($b))) }; } extern "C" { #[link_name = "llvm.arm.qadd8"] fn arm_qadd8(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.qsub8"] fn arm_qsub8(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.qsub16"] fn arm_qsub16(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.qadd16"] fn arm_qadd16(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.qasx"] fn arm_qasx(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.qsax"] fn arm_qsax(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.sadd16"] fn arm_sadd16(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.sadd8"] fn arm_sadd8(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smlad"] fn arm_smlad(a: i32, b: i32, c: i32) -> i32; #[link_name = "llvm.arm.smlsd"] fn arm_smlsd(a: i32, b: i32, c: i32) -> i32; #[link_name = "llvm.arm.sasx"] fn arm_sasx(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.sel"] fn arm_sel(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.shadd8"] fn arm_shadd8(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.shadd16"] fn arm_shadd16(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.shsub8"] fn arm_shsub8(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.ssub8"] fn arm_ssub8(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.usub8"] fn arm_usub8(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.shsub16"] fn arm_shsub16(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smuad"] fn arm_smuad(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smuadx"] fn arm_smuadx(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smusd"] fn arm_smusd(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.smusdx"] fn arm_smusdx(a: i32, b: i32) -> i32; #[link_name = "llvm.arm.usad8"] fn arm_usad8(a: i32, b: i32) -> u32; } /// Saturating four 8-bit integer additions /// /// Returns the 8-bit signed equivalent of /// /// res\[0\] = a\[0\] + b\[0\] /// res\[1\] = a\[1\] + b\[1\] /// res\[2\] = a\[2\] + b\[2\] /// res\[3\] = a\[3\] + b\[3\] #[inline] #[cfg_attr(test, assert_instr(qadd8))] pub unsafe fn __qadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_qadd8, a, b) } /// Saturating two 8-bit integer subtraction /// /// Returns the 8-bit signed equivalent of /// /// res\[0\] = a\[0\] - b\[0\] /// res\[1\] = a\[1\] - b\[1\] /// res\[2\] = a\[2\] - b\[2\] /// res\[3\] = a\[3\] - b\[3\] #[inline] #[cfg_attr(test, assert_instr(qsub8))] pub unsafe fn __qsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_qsub8, a, b) } /// Saturating two 16-bit integer subtraction /// /// Returns the 16-bit signed equivalent of /// /// res\[0\] = a\[0\] - b\[0\] /// res\[1\] = a\[1\] - b\[1\] #[inline] #[cfg_attr(test, assert_instr(qsub16))] pub unsafe fn __qsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qsub16, a, b) } /// Saturating two 16-bit integer additions /// /// Returns the 16-bit signed equivalent of /// /// res\[0\] = a\[0\] + b\[0\] /// res\[1\] = a\[1\] + b\[1\] #[inline] #[cfg_attr(test, assert_instr(qadd16))] pub unsafe fn __qadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qadd16, a, b) } /// Returns the 16-bit signed saturated equivalent of /// /// res\[0\] = a\[0\] - b\[1\] /// res\[1\] = a\[1\] + b\[0\] #[inline] #[cfg_attr(test, assert_instr(qasx))] pub unsafe fn __qasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qasx, a, b) } /// Returns the 16-bit signed saturated equivalent of /// /// res\[0\] = a\[0\] + b\[1\] /// res\[1\] = a\[1\] - b\[0\] #[inline] #[cfg_attr(test, assert_instr(qsax))] pub unsafe fn __qsax(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_qsax, a, b) } /// Returns the 16-bit signed saturated equivalent of /// /// res\[0\] = a\[0\] + b\[1\] /// res\[1\] = a\[1\] + b\[0\] /// /// and the GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(sadd16))] pub unsafe fn __sadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_sadd16, a, b) } /// Returns the 8-bit signed saturated equivalent of /// /// res\[0\] = a\[0\] + b\[1\] /// res\[1\] = a\[1\] + b\[0\] /// res\[2\] = a\[2\] + b\[2\] /// res\[3\] = a\[3\] + b\[3\] /// /// and the GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(sadd8))] pub unsafe fn __sadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_sadd8, a, b) } /// Dual 16-bit Signed Multiply with Addition of products /// and 32-bit accumulation. /// /// Returns the 16-bit signed equivalent of /// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] + c #[inline] #[cfg_attr(test, assert_instr(smlad))] pub unsafe fn __smlad(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlad(transmute(a), transmute(b), c) } /// Dual 16-bit Signed Multiply with Subtraction of products /// and 32-bit accumulation and overflow detection. /// /// Returns the 16-bit signed equivalent of /// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] + c #[inline] #[cfg_attr(test, assert_instr(smlsd))] pub unsafe fn __smlsd(a: int16x2_t, b: int16x2_t, c: i32) -> i32 { arm_smlsd(transmute(a), transmute(b), c) } /// Returns the 16-bit signed equivalent of /// /// res\[0\] = a\[0\] - b\[1\] /// res\[1\] = a\[1\] + b\[0\] /// /// and the GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(sasx))] pub unsafe fn __sasx(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_sasx, a, b) } /// Select bytes from each operand according to APSR GE flags /// /// Returns the equivalent of /// /// res\[0\] = GE\[0\] ? a\[0\] : b\[0\] /// res\[1\] = GE\[1\] ? a\[1\] : b\[1\] /// res\[2\] = GE\[2\] ? a\[2\] : b\[2\] /// res\[3\] = GE\[3\] ? a\[3\] : b\[3\] /// /// where GE are bits of APSR #[inline] #[cfg_attr(test, assert_instr(sel))] pub unsafe fn __sel(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_sel, a, b) } /// Signed halving parallel byte-wise addition. /// /// Returns the 8-bit signed equivalent of /// /// res\[0\] = (a\[0\] + b\[0\]) / 2 /// res\[1\] = (a\[1\] + b\[1\]) / 2 /// res\[2\] = (a\[2\] + b\[2\]) / 2 /// res\[3\] = (a\[3\] + b\[3\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shadd8))] pub unsafe fn __shadd8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_shadd8, a, b) } /// Signed halving parallel halfword-wise addition. /// /// Returns the 16-bit signed equivalent of /// /// res\[0\] = (a\[0\] + b\[0\]) / 2 /// res\[1\] = (a\[1\] + b\[1\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shadd16))] pub unsafe fn __shadd16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_shadd16, a, b) } /// Signed halving parallel byte-wise subtraction. /// /// Returns the 8-bit signed equivalent of /// /// res\[0\] = (a\[0\] - b\[0\]) / 2 /// res\[1\] = (a\[1\] - b\[1\]) / 2 /// res\[2\] = (a\[2\] - b\[2\]) / 2 /// res\[3\] = (a\[3\] - b\[3\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shsub8))] pub unsafe fn __shsub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_shsub8, a, b) } /// Inserts a `USUB8` instruction. /// /// Returns the 8-bit unsigned equivalent of /// /// res\[0\] = a\[0\] - a\[0\] /// res\[1\] = a\[1\] - a\[1\] /// res\[2\] = a\[2\] - a\[2\] /// res\[3\] = a\[3\] - a\[3\] /// /// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits. /// The GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(usub8))] pub unsafe fn __usub8(a: uint8x4_t, b: uint8x4_t) -> uint8x4_t { dsp_call!(arm_usub8, a, b) } /// Inserts a `SSUB8` instruction. /// /// Returns the 8-bit signed equivalent of /// /// res\[0\] = a\[0\] - a\[0\] /// res\[1\] = a\[1\] - a\[1\] /// res\[2\] = a\[2\] - a\[2\] /// res\[3\] = a\[3\] - a\[3\] /// /// where \[0\] is the lower 8 bits and \[3\] is the upper 8 bits. /// The GE bits of the APSR are set. #[inline] #[cfg_attr(test, assert_instr(ssub8))] pub unsafe fn __ssub8(a: int8x4_t, b: int8x4_t) -> int8x4_t { dsp_call!(arm_ssub8, a, b) } /// Signed halving parallel halfword-wise subtraction. /// /// Returns the 16-bit signed equivalent of /// /// res\[0\] = (a\[0\] - b\[0\]) / 2 /// res\[1\] = (a\[1\] - b\[1\]) / 2 #[inline] #[cfg_attr(test, assert_instr(shsub16))] pub unsafe fn __shsub16(a: int16x2_t, b: int16x2_t) -> int16x2_t { dsp_call!(arm_shsub16, a, b) } /// Signed Dual Multiply Add. /// /// Returns the equivalent of /// /// res = a\[0\] * b\[0\] + a\[1\] * b\[1\] /// /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smuad))] pub unsafe fn __smuad(a: int16x2_t, b: int16x2_t) -> i32 { arm_smuad(transmute(a), transmute(b)) } /// Signed Dual Multiply Add Reversed. /// /// Returns the equivalent of /// /// res = a\[0\] * b\[1\] + a\[1\] * b\[0\] /// /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smuadx))] pub unsafe fn __smuadx(a: int16x2_t, b: int16x2_t) -> i32 { arm_smuadx(transmute(a), transmute(b)) } /// Signed Dual Multiply Subtract. /// /// Returns the equivalent of /// /// res = a\[0\] * b\[0\] - a\[1\] * b\[1\] /// /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smusd))] pub unsafe fn __smusd(a: int16x2_t, b: int16x2_t) -> i32 { arm_smusd(transmute(a), transmute(b)) } /// Signed Dual Multiply Subtract Reversed. /// /// Returns the equivalent of /// /// res = a\[0\] * b\[1\] - a\[1\] * b\[0\] /// /// and sets the Q flag if overflow occurs on the addition. #[inline] #[cfg_attr(test, assert_instr(smusdx))] pub unsafe fn __smusdx(a: int16x2_t, b: int16x2_t) -> i32 { arm_smusdx(transmute(a), transmute(b)) } /// Sum of 8-bit absolute differences. /// /// Returns the 8-bit unsigned equivalent of /// /// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\ /// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) #[inline] #[cfg_attr(test, assert_instr(usad8))] pub unsafe fn __usad8(a: int8x4_t, b: int8x4_t) -> u32 { arm_usad8(transmute(a), transmute(b)) } /// Sum of 8-bit absolute differences and constant. /// /// Returns the 8-bit unsigned equivalent of /// /// res = abs(a\[0\] - b\[0\]) + abs(a\[1\] - b\[1\]) +\ /// (a\[2\] - b\[2\]) + (a\[3\] - b\[3\]) + c #[inline] #[cfg_attr(test, assert_instr(usad8))] pub unsafe fn __usada8(a: int8x4_t, b: int8x4_t, c: u32) -> u32 { __usad8(a, b) + c } #[cfg(test)] mod tests { use crate::core_arch::simd::{i16x2, i8x4, u8x4}; use std::mem::transmute; use stdsimd_test::simd_test; #[test] fn qadd8() { unsafe { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(2, -1, 0, 1); let c = i8x4::new(3, 1, 3, ::std::i8::MAX); let r: i8x4 = dsp_call!(super::__qadd8, a, b); assert_eq!(r, c); } } #[test] fn qsub8() { unsafe { let a = i8x4::new(1, 2, 3, ::std::i8::MIN); let b = i8x4::new(2, -1, 0, 1); let c = i8x4::new(-1, 3, 3, ::std::i8::MIN); let r: i8x4 = dsp_call!(super::__qsub8, a, b); assert_eq!(r, c); } } #[test] fn qadd16() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(2, -1); let c = i16x2::new(3, 1); let r: i16x2 = dsp_call!(super::__qadd16, a, b); assert_eq!(r, c); } } #[test] fn qsub16() { unsafe { let a = i16x2::new(10, 20); let b = i16x2::new(20, -10); let c = i16x2::new(-10, 30); let r: i16x2 = dsp_call!(super::__qsub16, a, b); assert_eq!(r, c); } } #[test] fn qasx() { unsafe { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(-1, ::std::i16::MAX); let r: i16x2 = dsp_call!(super::__qasx, a, b); assert_eq!(r, c); } } #[test] fn qsax() { unsafe { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(3, ::std::i16::MAX - 2); let r: i16x2 = dsp_call!(super::__qsax, a, b); assert_eq!(r, c); } } #[test] fn sadd16() { unsafe { let a = i16x2::new(1, ::std::i16::MAX); let b = i16x2::new(2, 2); let c = i16x2::new(3, -::std::i16::MAX); let r: i16x2 = dsp_call!(super::__sadd16, a, b); assert_eq!(r, c); } } #[test] fn sadd8() { unsafe { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(4, 3, 2, 2); let c = i8x4::new(5, 5, 5, -::std::i8::MAX); let r: i8x4 = dsp_call!(super::__sadd8, a, b); assert_eq!(r, c); } } #[test] fn sasx() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(2, 1); let c = i16x2::new(0, 4); let r: i16x2 = dsp_call!(super::__sasx, a, b); assert_eq!(r, c); } } #[test] fn smlad() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(3, 4); let r = super::__smlad(transmute(a), transmute(b), 10); assert_eq!(r, (1 * 3) + (2 * 4) + 10); } } #[test] fn smlsd() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(3, 4); let r = super::__smlsd(transmute(a), transmute(b), 10); assert_eq!(r, ((1 * 3) - (2 * 4)) + 10); } } #[test] fn sel() { unsafe { let a = i8x4::new(1, 2, 3, ::std::i8::MAX); let b = i8x4::new(4, 3, 2, 2); // call sadd8() to set GE bits super::__sadd8(transmute(a), transmute(b)); let c = i8x4::new(1, 2, 3, ::std::i8::MAX); let r: i8x4 = dsp_call!(super::__sel, a, b); assert_eq!(r, c); } } #[test] fn shadd8() { unsafe { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(5, 4, 3, 2); let c = i8x4::new(3, 3, 3, 3); let r: i8x4 = dsp_call!(super::__shadd8, a, b); assert_eq!(r, c); } } #[test] fn shadd16() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let c = i16x2::new(3, 3); let r: i16x2 = dsp_call!(super::__shadd16, a, b); assert_eq!(r, c); } } #[test] fn shsub8() { unsafe { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(5, 4, 3, 2); let c = i8x4::new(-2, -1, 0, 1); let r: i8x4 = dsp_call!(super::__shsub8, a, b); assert_eq!(r, c); } } #[test] fn ssub8() { unsafe { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(5, 4, 3, 2); let c = i8x4::new(-4, -2, 0, 2); let r: i8x4 = dsp_call!(super::__ssub8, a, b); assert_eq!(r, c); } } #[test] fn usub8() { unsafe { let a = u8x4::new(1, 2, 3, 4); let b = u8x4::new(5, 4, 3, 2); let c = u8x4::new(252, 254, 0, 2); let r: u8x4 = dsp_call!(super::__usub8, a, b); assert_eq!(r, c); } } #[test] fn shsub16() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let c = i16x2::new(-2, -1); let r: i16x2 = dsp_call!(super::__shsub16, a, b); assert_eq!(r, c); } } #[test] fn smuad() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let r = super::__smuad(transmute(a), transmute(b)); assert_eq!(r, 13); } } #[test] fn smuadx() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let r = super::__smuadx(transmute(a), transmute(b)); assert_eq!(r, 14); } } #[test] fn smusd() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let r = super::__smusd(transmute(a), transmute(b)); assert_eq!(r, -3); } } #[test] fn smusdx() { unsafe { let a = i16x2::new(1, 2); let b = i16x2::new(5, 4); let r = super::__smusdx(transmute(a), transmute(b)); assert_eq!(r, -6); } } #[test] fn usad8() { unsafe { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(4, 3, 2, 1); let r = super::__usad8(transmute(a), transmute(b)); assert_eq!(r, 8); } } #[test] fn usad8a() { unsafe { let a = i8x4::new(1, 2, 3, 4); let b = i8x4::new(4, 3, 2, 1); let c = 10; let r = super::__usada8(transmute(a), transmute(b), c); assert_eq!(r, 8 + c); } } } core_arch-0.1.5/src/arm/armclang.rs010064400007650000024000000040371342163752400153740ustar0000000000000000//! ARM compiler specific intrinsics //! //! # References //! //! - [ARM Compiler v 6.10 - armclang Reference Guide][arm_comp_ref] //! //! [arm_comp_ref]: https://developer.arm.com/docs/100067/0610 #[cfg(test)] use stdsimd_test::assert_instr; /// Inserts a breakpoint instruction. /// /// `val` is a compile-time constant integer in range `[0, 255]`. /// /// The breakpoint instruction inserted is: /// /// * `BKPT` when compiling as T32, /// * `BRK` when compiling as A32 or A64. /// /// # Safety /// /// If `val` is out-of-range the behavior is **undefined**. /// /// # Note /// /// [ARM's documentation][arm_docs] defines that `__breakpoint` accepts the /// following values for `val`: /// /// - `0...65535` when compiling as A32 or A64, /// - `0...255` when compiling as T32. /// /// The current implementation only accepts values in range `[0, 255]` - if the /// value is out-of-range the behavior is **undefined**. /// /// [arm_docs]: https://developer.arm.com/docs/100067/latest/compiler-specific-intrinsics/__breakpoint-intrinsic #[cfg_attr(all(test, target_arch = "arm"), assert_instr(bkpt, val = 0))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(brk, val = 0))] #[inline(always)] #[rustc_args_required_const(0)] pub unsafe fn __breakpoint(val: i32) { // Ensure that this compiles correctly on non-arm architectures, so libstd // doc builds work. The proper macro will shadow this definition below. #[allow(unused_macros)] macro_rules! call { ($e:expr) => { () }; } #[cfg(target_arch = "arm")] macro_rules! call { ($imm8:expr) => { asm!(concat!("BKPT ", stringify!($imm8)) : : : : "volatile") } } #[cfg(target_arch = "aarch64")] macro_rules! call { ($imm8:expr) => { asm!(concat!("BRK ", stringify!($imm8)) : : : : "volatile") } } // We can't `panic!` inside this intrinsic, so we can't really validate the // arguments here. If `val` is out-of-range this macro uses `val == 255`: constify_imm8!(val, call); } core_arch-0.1.5/src/arm/mod.rs010064400007650000024000000023601343447103600143620ustar0000000000000000//! ARM intrinsics. //! //! The reference for NEON is [ARM's NEON Intrinsics Reference][arm_ref]. The //! [ARM's NEON Intrinsics Online Database][arm_dat] is also useful. //! //! [arm_ref]: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf //! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics #![allow(non_camel_case_types)] mod armclang; pub use self::armclang::*; mod v6; pub use self::v6::*; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] mod v7; #[cfg(any(target_arch = "aarch64", target_feature = "v7"))] pub use self::v7::*; // NEON is supported on AArch64, and on ARM when built with the v7 and neon // features. Building ARM without neon produces incorrect codegen. #[cfg(any( target_arch = "aarch64", all(target_feature = "v7", target_feature = "neon"), dox ))] mod neon; #[cfg(any( target_arch = "aarch64", all(target_feature = "v7", target_feature = "neon"), dox ))] pub use self::neon::*; pub use crate::core_arch::acle::*; #[cfg(test)] use stdsimd_test::assert_instr; /// Generates the trap instruction `UDF` #[cfg(target_arch = "arm")] #[cfg_attr(test, assert_instr(udf))] #[inline] pub unsafe fn udf() -> ! { crate::intrinsics::abort() } core_arch-0.1.5/src/arm/neon.rs010064400007650000024000001363441343447103600145540ustar0000000000000000//! ARMv7 NEON intrinsics use crate::{core_arch::simd_llvm::*, mem::transmute}; #[cfg(test)] use stdsimd_test::assert_instr; types! { /// ARM-specific 64-bit wide vector of eight packed `i8`. pub struct int8x8_t(i8, i8, i8, i8, i8, i8, i8, i8); /// ARM-specific 64-bit wide vector of eight packed `u8`. pub struct uint8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); /// ARM-specific 64-bit wide polynomial vector of eight packed `u8`. pub struct poly8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); /// ARM-specific 64-bit wide vector of four packed `i16`. pub struct int16x4_t(i16, i16, i16, i16); /// ARM-specific 64-bit wide vector of four packed `u16`. pub struct uint16x4_t(u16, u16, u16, u16); // FIXME: ARM-specific 64-bit wide vector of four packed `f16`. // pub struct float16x4_t(f16, f16, f16, f16); /// ARM-specific 64-bit wide vector of four packed `u16`. pub struct poly16x4_t(u16, u16, u16, u16); /// ARM-specific 64-bit wide vector of two packed `i32`. pub struct int32x2_t(i32, i32); /// ARM-specific 64-bit wide vector of two packed `u32`. pub struct uint32x2_t(u32, u32); /// ARM-specific 64-bit wide vector of two packed `f32`. pub struct float32x2_t(f32, f32); /// ARM-specific 64-bit wide vector of one packed `i64`. pub struct int64x1_t(i64); /// ARM-specific 64-bit wide vector of one packed `u64`. pub struct uint64x1_t(u64); /// ARM-specific 128-bit wide vector of sixteen packed `i8`. pub struct int8x16_t( i8, i8 ,i8, i8, i8, i8 ,i8, i8, i8, i8 ,i8, i8, i8, i8 ,i8, i8, ); /// ARM-specific 128-bit wide vector of sixteen packed `u8`. pub struct uint8x16_t( u8, u8 ,u8, u8, u8, u8 ,u8, u8, u8, u8 ,u8, u8, u8, u8 ,u8, u8, ); /// ARM-specific 128-bit wide vector of sixteen packed `u8`. pub struct poly8x16_t( u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 ); /// ARM-specific 128-bit wide vector of eight packed `i16`. pub struct int16x8_t(i16, i16, i16, i16, i16, i16, i16, i16); /// ARM-specific 128-bit wide vector of eight packed `u16`. pub struct uint16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`. // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16); /// ARM-specific 128-bit wide vector of eight packed `u16`. pub struct poly16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); /// ARM-specific 128-bit wide vector of four packed `i32`. pub struct int32x4_t(i32, i32, i32, i32); /// ARM-specific 128-bit wide vector of four packed `u32`. pub struct uint32x4_t(u32, u32, u32, u32); /// ARM-specific 128-bit wide vector of four packed `f32`. pub struct float32x4_t(f32, f32, f32, f32); /// ARM-specific 128-bit wide vector of two packed `i64`. pub struct int64x2_t(i64, i64); /// ARM-specific 128-bit wide vector of two packed `u64`. pub struct uint64x2_t(u64, u64); } /// ARM-specific type containing two `int8x8_t` vectors. #[derive(Copy, Clone)] pub struct int8x8x2_t(pub int8x8_t, pub int8x8_t); /// ARM-specific type containing three `int8x8_t` vectors. #[derive(Copy, Clone)] pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t); /// ARM-specific type containing four `int8x8_t` vectors. #[derive(Copy, Clone)] pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t); /// ARM-specific type containing two `uint8x8_t` vectors. #[derive(Copy, Clone)] pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t); /// ARM-specific type containing three `uint8x8_t` vectors. #[derive(Copy, Clone)] pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); /// ARM-specific type containing four `uint8x8_t` vectors. #[derive(Copy, Clone)] pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); /// ARM-specific type containing two `poly8x8_t` vectors. #[derive(Copy, Clone)] pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t); /// ARM-specific type containing three `poly8x8_t` vectors. #[derive(Copy, Clone)] pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); /// ARM-specific type containing four `poly8x8_t` vectors. #[derive(Copy, Clone)] pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); #[allow(improper_ctypes)] extern "C" { #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")] #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] fn frsqrte_v2f32(a: float32x2_t) -> float32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8")] fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16")] fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32")] fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8")] fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16")] fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32")] fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32")] fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8")] fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16")] fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32")] fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8")] fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16")] fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32")] fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")] fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; } #[cfg(target_arch = "arm")] #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.arm.neon.vtbl1"] fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; #[link_name = "llvm.arm.neon.vtbl2"] fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; #[link_name = "llvm.arm.neon.vtbl3"] fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; #[link_name = "llvm.arm.neon.vtbl4"] fn vtbl4(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; #[link_name = "llvm.arm.neon.vtbx1"] fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; #[link_name = "llvm.arm.neon.vtbx2"] fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; #[link_name = "llvm.arm.neon.vtbx3"] fn vtbx3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; #[link_name = "llvm.arm.neon.vtbx4"] fn vtbx4( a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, e: int8x8_t, ) -> int8x8_t; } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] pub unsafe fn vaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))] pub unsafe fn vadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { simd_add(a, b) } /// Vector add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))] pub unsafe fn vaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { simd_add(a, b) } /// Vector long add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] pub unsafe fn vaddl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { let a: int16x8_t = simd_cast(a); let b: int16x8_t = simd_cast(b); simd_add(a, b) } /// Vector long add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] pub unsafe fn vaddl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { let a: int32x4_t = simd_cast(a); let b: int32x4_t = simd_cast(b); simd_add(a, b) } /// Vector long add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] pub unsafe fn vaddl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { let a: int64x2_t = simd_cast(a); let b: int64x2_t = simd_cast(b); simd_add(a, b) } /// Vector long add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] pub unsafe fn vaddl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { let a: uint16x8_t = simd_cast(a); let b: uint16x8_t = simd_cast(b); simd_add(a, b) } /// Vector long add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] pub unsafe fn vaddl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { let a: uint32x4_t = simd_cast(a); let b: uint32x4_t = simd_cast(b); simd_add(a, b) } /// Vector long add. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] pub unsafe fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { let a: uint64x2_t = simd_cast(a); let b: uint64x2_t = simd_cast(b); simd_add(a, b) } /// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t { simd_cast(a) } /// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t { simd_cast(a) } /// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t { simd_cast(a) } /// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { simd_cast(a) } /// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { simd_cast(a) } /// Vector narrow integer. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { simd_cast(a) } /// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t { simd_cast(a) } /// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t { simd_cast(a) } /// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t { simd_cast(a) } /// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { simd_cast(a) } /// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { simd_cast(a) } /// Vector long move. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] pub unsafe fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { simd_cast(a) } /// Reciprocal square-root estimate. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { frsqrte_v2f32(a) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] pub unsafe fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { vpmins_v8i8(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] pub unsafe fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { vpmins_v4i16(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] pub unsafe fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { vpmins_v2i32(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] pub unsafe fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { vpminu_v8i8(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] pub unsafe fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { vpminu_v4i16(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] pub unsafe fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { vpminu_v2i32(a, b) } /// Folding minimum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminp))] pub unsafe fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { vpminf_v2f32(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] pub unsafe fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { vpmaxs_v8i8(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] pub unsafe fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { vpmaxs_v4i16(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] pub unsafe fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { vpmaxs_v2i32(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] pub unsafe fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { vpmaxu_v8i8(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] pub unsafe fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { vpmaxu_v4i16(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] pub unsafe fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { vpmaxu_v2i32(a, b) } /// Folding maximum of adjacent pairs #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxp))] pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { vpmaxf_v2f32(a, b) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { vtbl1(a, b) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { transmute(vtbl1(transmute(a), transmute(b))) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { transmute(vtbl1(transmute(a), transmute(b))) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { vtbl2(a.0, a.1, b) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { vtbl3(a.0, a.1, a.2, b) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { transmute(vtbl3( transmute(a.0), transmute(a.1), transmute(a.2), transmute(b), )) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { transmute(vtbl3( transmute(a.0), transmute(a.1), transmute(a.2), transmute(b), )) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { vtbl4(a.0, a.1, a.2, a.3, b) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { transmute(vtbl4( transmute(a.0), transmute(a.1), transmute(a.2), transmute(a.3), transmute(b), )) } /// Table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbl))] pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { transmute(vtbl4( transmute(a.0), transmute(a.1), transmute(a.2), transmute(a.3), transmute(b), )) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { vtbx1(a, b, c) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { vtbx2(a, b.0, b.1, c) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { transmute(vtbx2( transmute(a), transmute(b.0), transmute(b.1), transmute(c), )) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { transmute(vtbx2( transmute(a), transmute(b.0), transmute(b.1), transmute(c), )) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { vtbx3(a, b.0, b.1, b.2, c) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { transmute(vtbx3( transmute(a), transmute(b.0), transmute(b.1), transmute(b.2), transmute(c), )) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { transmute(vtbx3( transmute(a), transmute(b.0), transmute(b.1), transmute(b.2), transmute(c), )) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { vtbx4(a, b.0, b.1, b.2, b.3, c) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { transmute(vtbx4( transmute(a), transmute(b.0), transmute(b.1), transmute(b.2), transmute(b.3), transmute(c), )) } /// Extended table look-up #[inline] #[cfg(target_arch = "arm")] #[cfg(target_endian = "little")] #[target_feature(enable = "neon,v7")] #[cfg_attr(test, assert_instr(vtbx))] pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { transmute(vtbx4( transmute(a), transmute(b.0), transmute(b.1), transmute(b.2), transmute(b.3), transmute(c), )) } #[cfg(test)] mod tests { use crate::core_arch::{arm::*, simd::*}; use std::mem::transmute; use stdsimd_test::simd_test; #[simd_test(enable = "neon")] unsafe fn test_vadd_s8() { let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1); let e = i8x8::new(9, 9, 9, 9, 9, 9, 9, 9); let r: i8x8 = transmute(vadd_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddq_s8() { let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); let b = i8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); let e = i8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9); let r: i8x16 = transmute(vaddq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vadd_s16() { let a = i16x4::new(1, 2, 3, 4); let b = i16x4::new(8, 7, 6, 5); let e = i16x4::new(9, 9, 9, 9); let r: i16x4 = transmute(vadd_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddq_s16() { let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1); let e = i16x8::new(9, 9, 9, 9, 9, 9, 9, 9); let r: i16x8 = transmute(vaddq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vadd_s32() { let a = i32x2::new(1, 2); let b = i32x2::new(8, 7); let e = i32x2::new(9, 9); let r: i32x2 = transmute(vadd_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddq_s32() { let a = i32x4::new(1, 2, 3, 4); let b = i32x4::new(8, 7, 6, 5); let e = i32x4::new(9, 9, 9, 9); let r: i32x4 = transmute(vaddq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vadd_u8() { let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u8x8::new(8, 7, 6, 5, 4, 3, 2, 1); let e = u8x8::new(9, 9, 9, 9, 9, 9, 9, 9); let r: u8x8 = transmute(vadd_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddq_u8() { let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); let b = u8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); let e = u8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9); let r: u8x16 = transmute(vaddq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vadd_u16() { let a = u16x4::new(1, 2, 3, 4); let b = u16x4::new(8, 7, 6, 5); let e = u16x4::new(9, 9, 9, 9); let r: u16x4 = transmute(vadd_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddq_u16() { let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1); let e = u16x8::new(9, 9, 9, 9, 9, 9, 9, 9); let r: u16x8 = transmute(vaddq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vadd_u32() { let a = u32x2::new(1, 2); let b = u32x2::new(8, 7); let e = u32x2::new(9, 9); let r: u32x2 = transmute(vadd_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddq_u32() { let a = u32x4::new(1, 2, 3, 4); let b = u32x4::new(8, 7, 6, 5); let e = u32x4::new(9, 9, 9, 9); let r: u32x4 = transmute(vaddq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vadd_f32() { let a = f32x2::new(1., 2.); let b = f32x2::new(8., 7.); let e = f32x2::new(9., 9.); let r: f32x2 = transmute(vadd_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddq_f32() { let a = f32x4::new(1., 2., 3., 4.); let b = f32x4::new(8., 7., 6., 5.); let e = f32x4::new(9., 9., 9., 9.); let r: f32x4 = transmute(vaddq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddl_s8() { let v = ::std::i8::MAX; let a = i8x8::new(v, v, v, v, v, v, v, v); let v = 2 * (v as i16); let e = i16x8::new(v, v, v, v, v, v, v, v); let r: i16x8 = transmute(vaddl_s8(transmute(a), transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddl_s16() { let v = ::std::i16::MAX; let a = i16x4::new(v, v, v, v); let v = 2 * (v as i32); let e = i32x4::new(v, v, v, v); let r: i32x4 = transmute(vaddl_s16(transmute(a), transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddl_s32() { let v = ::std::i32::MAX; let a = i32x2::new(v, v); let v = 2 * (v as i64); let e = i64x2::new(v, v); let r: i64x2 = transmute(vaddl_s32(transmute(a), transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddl_u8() { let v = ::std::u8::MAX; let a = u8x8::new(v, v, v, v, v, v, v, v); let v = 2 * (v as u16); let e = u16x8::new(v, v, v, v, v, v, v, v); let r: u16x8 = transmute(vaddl_u8(transmute(a), transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddl_u16() { let v = ::std::u16::MAX; let a = u16x4::new(v, v, v, v); let v = 2 * (v as u32); let e = u32x4::new(v, v, v, v); let r: u32x4 = transmute(vaddl_u16(transmute(a), transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vaddl_u32() { let v = ::std::u32::MAX; let a = u32x2::new(v, v); let v = 2 * (v as u64); let e = u64x2::new(v, v); let r: u64x2 = transmute(vaddl_u32(transmute(a), transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovn_s16() { let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let r: i8x8 = transmute(vmovn_s16(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovn_s32() { let a = i32x4::new(1, 2, 3, 4); let e = i16x4::new(1, 2, 3, 4); let r: i16x4 = transmute(vmovn_s32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovn_s64() { let a = i64x2::new(1, 2); let e = i32x2::new(1, 2); let r: i32x2 = transmute(vmovn_s64(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovn_u16() { let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let r: u8x8 = transmute(vmovn_u16(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovn_u32() { let a = u32x4::new(1, 2, 3, 4); let e = u16x4::new(1, 2, 3, 4); let r: u16x4 = transmute(vmovn_u32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovn_u64() { let a = u64x2::new(1, 2); let e = u32x2::new(1, 2); let r: u32x2 = transmute(vmovn_u64(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovl_s8() { let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let r: i16x8 = transmute(vmovl_s8(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovl_s16() { let e = i32x4::new(1, 2, 3, 4); let a = i16x4::new(1, 2, 3, 4); let r: i32x4 = transmute(vmovl_s16(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovl_s32() { let e = i64x2::new(1, 2); let a = i32x2::new(1, 2); let r: i64x2 = transmute(vmovl_s32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovl_u8() { let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let r: u16x8 = transmute(vmovl_u8(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovl_u16() { let e = u32x4::new(1, 2, 3, 4); let a = u16x4::new(1, 2, 3, 4); let r: u32x4 = transmute(vmovl_u16(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vmovl_u32() { let e = u64x2::new(1, 2); let a = u32x2::new(1, 2); let r: u64x2 = transmute(vmovl_u32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vrsqrt_f32() { let a = f32x2::new(1.0, 2.0); let e = f32x2::new(0.9980469, 0.7050781); let r: f32x2 = transmute(vrsqrte_f32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmin_s8() { let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = i8x8::new(-2, -4, 5, 7, 0, 2, 4, 6); let r: i8x8 = transmute(vpmin_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmin_s16() { let a = i16x4::new(1, 2, 3, -4); let b = i16x4::new(0, 3, 2, 5); let e = i16x4::new(1, -4, 0, 2); let r: i16x4 = transmute(vpmin_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmin_s32() { let a = i32x2::new(1, -2); let b = i32x2::new(0, 3); let e = i32x2::new(-2, 0); let r: i32x2 = transmute(vpmin_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmin_u8() { let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = u8x8::new(1, 3, 5, 7, 0, 2, 4, 6); let r: u8x8 = transmute(vpmin_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmin_u16() { let a = u16x4::new(1, 2, 3, 4); let b = u16x4::new(0, 3, 2, 5); let e = u16x4::new(1, 3, 0, 2); let r: u16x4 = transmute(vpmin_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmin_u32() { let a = u32x2::new(1, 2); let b = u32x2::new(0, 3); let e = u32x2::new(1, 0); let r: u32x2 = transmute(vpmin_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmin_f32() { let a = f32x2::new(1., -2.); let b = f32x2::new(0., 3.); let e = f32x2::new(-2., 0.); let r: f32x2 = transmute(vpmin_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmax_s8() { let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = i8x8::new(1, 3, 6, 8, 3, 5, 7, 9); let r: i8x8 = transmute(vpmax_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmax_s16() { let a = i16x4::new(1, 2, 3, -4); let b = i16x4::new(0, 3, 2, 5); let e = i16x4::new(2, 3, 3, 5); let r: i16x4 = transmute(vpmax_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmax_s32() { let a = i32x2::new(1, -2); let b = i32x2::new(0, 3); let e = i32x2::new(1, 3); let r: i32x2 = transmute(vpmax_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmax_u8() { let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); let e = u8x8::new(2, 4, 6, 8, 3, 5, 7, 9); let r: u8x8 = transmute(vpmax_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmax_u16() { let a = u16x4::new(1, 2, 3, 4); let b = u16x4::new(0, 3, 2, 5); let e = u16x4::new(2, 4, 3, 5); let r: u16x4 = transmute(vpmax_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmax_u32() { let a = u32x2::new(1, 2); let b = u32x2::new(0, 3); let e = u32x2::new(2, 3); let r: u32x2 = transmute(vpmax_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vpmax_f32() { let a = f32x2::new(1., -2.); let b = f32x2::new(0., 3.); let e = f32x2::new(1., 3.); let r: f32x2 = transmute(vpmax_f32(transmute(a), transmute(b))); assert_eq!(r, e); } } #[cfg(test)] #[cfg(target_endian = "little")] #[path = "table_lookup_tests.rs"] mod table_lookup_tests; core_arch-0.1.5/src/arm/table_lookup_tests.rs010064400007650000024000001114401343447103600175050ustar0000000000000000//! Tests for ARM+v7+neon table lookup (vtbl, vtbx) intrinsics. //! //! These are included in `{arm, aarch64}::neon`. use super::*; #[cfg(target_arch = "aarch64")] use crate::core_arch::aarch64::*; #[cfg(target_arch = "arm")] use crate::core_arch::arm::*; use crate::core_arch::simd::*; use std::mem; use stdsimd_test::simd_test; macro_rules! test_vtbl { ($test_name:ident => $fn_id:ident: - table[$table_t:ident]: [$($table_v:expr),*] | $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|* ) => { #[simd_test(enable = "neon")] unsafe fn $test_name() { // create table as array, and transmute it to // arm's table type let table: $table_t = ::mem::transmute([$($table_v),*]); // For each control vector, perform a table lookup and // verify the result: $( { let ctrl: $ctrl_t = ::mem::transmute([$($ctrl_v),*]); let result = $fn_id(table, ::mem::transmute(ctrl)); let result: $ctrl_t = ::mem::transmute(result); let expected: $ctrl_t = ::mem::transmute([$($exp_v),*]); assert_eq!(result, expected); } )* } } } // ARM+v7+neon and AArch64+neon tests test_vtbl!( test_vtbl1_s8 => vtbl1_s8: - table[int8x8_t]: [0_i8, -11, 2, 3, 4, 5, 6, 7] | - ctrl[i8x8]: [3_i8, 4, 1, 6, 0, 2, 7, 5] => [3_i8, 4, -11, 6, 0, 2, 7, 5] | - ctrl[i8x8]: [3_i8, 8, 1, -9, 10, 2, 15, 5] => [3_i8, 0, -11, 0, 0, 2, 0, 5] ); test_vtbl!( test_vtbl1_u8 => vtbl1_u8: - table[uint8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 0, 1, 0, 0, 2, 0, 5] ); test_vtbl!( test_vtbl1_p8 => vtbl1_p8: - table[poly8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 0, 1, 0, 0, 2, 0, 5] ); test_vtbl!( test_vtbl2_s8 => vtbl2_s8: - table[int8x8x2_t]: [ 0_i8, -17, 34, 51, 68, 85, 102, 119, -106, -93, -84, -117, -104, -116, -72, -121 ] | - ctrl[i8x8]: [127_i8, 15, 1, 14, 2, 13, 3, 12] => [0_i8, -121, -17, -72, 34, -116, 51, -104] | - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, -19, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, 0] ); test_vtbl!( test_vtbl2_u8 => vtbl2_u8: - table[uint8x8x2_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255 ] | - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 255, 17, 238, 34, 221, 51, 204] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 0] ); test_vtbl!( test_vtbl2_p8 => vtbl2_p8: - table[poly8x8x2_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255 ] | - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 255, 17, 238, 34, 221, 51, 204] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 0] ); test_vtbl!( test_vtbl3_s8 => vtbl3_s8: - table[int8x8x3_t]: [ 0_i8, -17, 34, 51, 68, 85, 102, 119, -106, -93, -84, -117, -104, -116, -72, -121, 0, 1, -2, 3, 4, -5, 6, 7 ] | - ctrl[i8x8]: [127_i8, 15, 1, 19, 2, 13, 21, 12] => [0_i8, -121, -17, 3, 34, -116, -5, -104] | - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, -27, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, -2] ); test_vtbl!( test_vtbl3_u8 => vtbl3_u8: - table[uint8x8x3_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255, 0, 1, 2, 3, 4, 5, 6, 7 ] | - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 2] ); test_vtbl!( test_vtbl3_p8 => vtbl3_p8: - table[poly8x8x3_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255, 0, 1, 2, 3, 4, 5, 6, 7 ] | - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 2] ); test_vtbl!( test_vtbl4_s8 => vtbl4_s8: - table[int8x8x4_t]: [ 0_i8, -17, 34, 51, 68, 85, 102, 119, -106, -93, -84, -117, -104, -116, -72, -121, 0, 1, -2, 3, 4, -5, 6, 7, 8, -9, 10, 11, 12, -13, 14, 15 ] | - ctrl[i8x8]: [127_i8, 15, 1, 19, 2, 13, 25, 12] => [0_i8, -121, -17, 3, 34, -116, -9, -104] | - ctrl[i8x8]: [4_i8, 11, 32, 10, -33, 27, 7, 18] => [68_i8, -117, 0, -84, 0, 11, 119, -2] ); test_vtbl!( test_vtbl4_u8 => vtbl4_u8: - table[uint8x8x4_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] | - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 11, 119, 2] ); test_vtbl!( test_vtbl4_p8 => vtbl4_p8: - table[poly8x8x4_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 136, 153, 170, 187, 204, 221, 238, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] | - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 11, 119, 2] ); macro_rules! test_vtbx { ($test_name:ident => $fn_id:ident: - table[$table_t:ident]: [$($table_v:expr),*] | - ext[$ext_t:ident]: [$($ext_v:expr),*] | $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|* ) => { #[simd_test(enable = "neon")] unsafe fn $test_name() { // create table as array, and transmute it to // arm's table type let table: $table_t = ::mem::transmute([$($table_v),*]); let ext: $ext_t = ::mem::transmute([$($ext_v),*]); // For each control vector, perform a table lookup and // verify the result: $( { let ctrl: $ctrl_t = ::mem::transmute([$($ctrl_v),*]); let result = $fn_id(ext, table, ::mem::transmute(ctrl)); let result: $ctrl_t = ::mem::transmute(result); let expected: $ctrl_t = ::mem::transmute([$($exp_v),*]); assert_eq!(result, expected); } )* } } } test_vtbx!( test_vtbx1_s8 => vtbx1_s8: - table[int8x8_t]: [0_i8, 1, 2, -3, 4, 5, 6, 7] | - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[i8x8]: [3_i8, 4, 1, 6, 0, 2, 7, 5] => [-3_i8, 4, 1, 6, 0, 2, 7, 5] | - ctrl[i8x8]: [3_i8, 8, 1, 9, 10, 2, -15, 5] => [-3_i8, 51, 1, 53, 54, 2, 56, 5] ); test_vtbx!( test_vtbx1_u8 => vtbx1_u8: - table[uint8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | - ext[uint8x8_t]: [50_u8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 51, 1, 53, 54, 2, 56, 5] ); test_vtbx!( test_vtbx1_p8 => vtbx1_p8: - table[poly8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | - ext[poly8x8_t]: [50_u8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 51, 1, 53, 54, 2, 56, 5] ); test_vtbx!( test_vtbx2_s8 => vtbx2_s8: - table[int8x8x2_t]: [0_i8, 1, 2, -3, 4, 5, 6, 7, 8, 9, -10, 11, 12, -13, 14, 15] | - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[i8x8]: [3_i8, 4, 1, 6, 10, 2, 7, 15] => [-3_i8, 4, 1, 6, -10, 2, 7, 15] | - ctrl[i8x8]: [3_i8, 8, 1, 10, 17, 2, 15, -19] => [-3_i8, 8, 1, -10, 54, 2, 15, 57] ); test_vtbx!( test_vtbx2_u8 => vtbx2_u8: - table[uint8x8x2_t]: [0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | - ext[uint8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[u8x8]: [3_u8, 4, 1, 6, 10, 2, 7, 15] => [3_i8, 4, 1, 6, 10, 2, 7, 15] | - ctrl[u8x8]: [3_u8, 8, 1, 10, 17, 2, 15, 19] => [3_i8, 8, 1, 10, 54, 2, 15, 57] ); test_vtbx!( test_vtbx2_p8 => vtbx2_p8: - table[poly8x8x2_t]: [0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | - ext[poly8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[u8x8]: [3_u8, 4, 1, 6, 10, 2, 7, 15] => [3_i8, 4, 1, 6, 10, 2, 7, 15] | - ctrl[u8x8]: [3_u8, 8, 1, 10, 17, 2, 15, 19] => [3_i8, 8, 1, 10, 54, 2, 15, 57] ); test_vtbx!( test_vtbx3_s8 => vtbx3_s8: - table[int8x8x3_t]: [ 0_i8, 1, 2, -3, 4, 5, 6, 7, 8, 9, -10, 11, 12, -13, 14, 15, 16, -17, 18, 19, 20, 21, 22, 23 ] | - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[i8x8]: [3_i8, 4, 17, 22, 10, 2, 7, 15] => [-3_i8, 4, -17, 22, -10, 2, 7, 15] | - ctrl[i8x8]: [3_i8, 8, 17, 10, 37, 2, 19, -29] => [-3_i8, 8, -17, -10, 54, 2, 19, 57] ); test_vtbx!( test_vtbx3_u8 => vtbx3_u8: - table[uint8x8x3_t]: [ 0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 ] | - ext[uint8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[u8x8]: [3_u8, 4, 17, 22, 10, 2, 7, 15] => [3_i8, 4, 17, 22, 10, 2, 7, 15] | - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 29] => [3_i8, 8, 17, 10, 54, 2, 19, 57] ); test_vtbx!( test_vtbx3_p8 => vtbx3_p8: - table[poly8x8x3_t]: [ 0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 ] | - ext[poly8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[u8x8]: [3_u8, 4, 17, 22, 10, 2, 7, 15] => [3_i8, 4, 17, 22, 10, 2, 7, 15] | - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 29] => [3_i8, 8, 17, 10, 54, 2, 19, 57] ); test_vtbx!( test_vtbx4_s8 => vtbx4_s8: - table[int8x8x4_t]: [ 0_i8, 1, 2, -3, 4, 5, 6, 7, 8, 9, -10, 11, 12, -13, 14, 15, 16, -17, 18, 19, 20, 21, 22, 23, -24, 25, 26, -27, 28, -29, 30, 31] | - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[i8x8]: [3_i8, 31, 17, 22, 10, 29, 7, 15] => [-3_i8, 31, -17, 22, -10, -29, 7, 15] | - ctrl[i8x8]: [3_i8, 8, 17, 10, 37, 2, 19, -42] => [-3_i8, 8, -17, -10, 54, 2, 19, 57] ); test_vtbx!( test_vtbx4_u8 => vtbx4_u8: - table[uint8x8x4_t]: [ 0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] | - ext[uint8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[u8x8]: [3_u8, 31, 17, 22, 10, 29, 7, 15] => [3_i8, 31, 17, 22, 10, 29, 7, 15] | - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 42] => [3_i8, 8, 17, 10, 54, 2, 19, 57] ); test_vtbx!( test_vtbx4_p8 => vtbx4_p8: - table[poly8x8x4_t]: [ 0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31] | - ext[poly8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | - ctrl[u8x8]: [3_u8, 31, 17, 22, 10, 29, 7, 15] => [3_i8, 31, 17, 22, 10, 29, 7, 15] | - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 42] => [3_i8, 8, 17, 10, 54, 2, 19, 57] ); // Aarch64 tests #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl1_s8 => vqtbl1_s8: - table[int8x16_t]: [ 0_i8, -17, 34, 51, 68, 85, 102, 119, -106, -93, -84, -117, -104, -116, -72, -121 ] | - ctrl[i8x8]: [127_i8, 15, 1, 14, 2, 13, 3, 12] => [0_i8, -121, -17, -72, 34, -116, 51, -104] | - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, 19, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, 0] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl1q_s8 => vqtbl1q_s8: - table[int8x16_t]: [ 0_i8, -17, 34, 51, 68, 85, 102, 119, -106, -93, -84, -117, -104, -116, -72, -121 ] | - ctrl[i8x16]: [127_i8, 15, 1, 14, 2, 13, 3, 12, 4_i8, 11, 16, 10, 6, 19, 7, 18] => [0_i8, -121, -17, -72, 34, -116, 51, -104, 68, -117, 0, -84, 102, 0, 119, 0] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl1_u8 => vqtbl1_u8: - table[uint8x16_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 106, 93, 84, 117, 104, 116, 72, 121 ] | - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 121, 17, 72, 34, 116, 51, 104] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 0, 84, 102, 0, 119, 0] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl1q_u8 => vqtbl1q_u8: - table[uint8x16_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 106, 93, 84, 117, 104, 116, 72, 121 ] | - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] => [0_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 0, 84, 102, 0, 119, 0] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl1_p8 => vqtbl1_p8: - table[poly8x16_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 106, 93, 84, 117, 104, 116, 72, 121 ] | - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 121, 17, 72, 34, 116, 51, 104] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 0, 84, 102, 0, 119, 0] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl1q_p8 => vqtbl1q_p8: - table[poly8x16_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 106, 93, 84, 117, 104, 116, 72, 121 ] | - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] => [0_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 0, 84, 102, 0, 119, 0] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl2_s8 => vqtbl2_s8: - table[int8x16x2_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31 ] | - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [0_i8, -15, -1, 24, 2, -13, -3, -29] | - ctrl[i8x8]: [4_i8, 31, 32, 10, 6, 49, 7, 18] => [4_i8, -31, 0, 10, 6, 0, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl2q_s8 => vqtbl2q_s8: - table[int8x16x2_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31 ] | - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 31, 32, 10, 6, 49, 7, 18] => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, -31, 0, 10, 6, 0, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl2_u8 => vqtbl2_u8: - table[uint8x16x2_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 0, 10, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl2q_u8 => vqtbl2q_u8: - table[uint8x16x2_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 0, 10, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl2_p8 => vqtbl2_p8: - table[poly8x16x2_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 0, 10, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl2q_p8 => vqtbl2q_p8: - table[poly8x16x2_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 0, 10, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl3_s8 => vqtbl3_s8: - table[int8x16x3_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31, 32, -33, 34, -35, 36, -37, 38, -39, 40, -41, 42, -43, 44, -45, 46, -47 ] | - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [0_i8, -15, -1, 24, 2, -13, -3, -29] | - ctrl[i8x8]: [4_i8, 32, 46, 51, 6, 49, 7, 18] => [4_i8, 32, 46, 0, 6, 0, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl3q_s8 => vqtbl3q_s8: - table[int8x16x3_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31, 32, -33, 34, -35, 36, -37, 38, -39, 40, -41, 42, -43, 44, -45, 46, -47 ] | - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 32, 46, 51, 6, 49, 7, 18] => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, 32, 46, 0, 6, 0, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl3_u8 => vqtbl3_u8: - table[uint8x16x3_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 ] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 0, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl3q_u8 => vqtbl3q_u8: - table[uint8x16x3_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 0, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl3_p8 => vqtbl3_p8: - table[poly8x16x3_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 ] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 0, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl3q_p8 => vqtbl3q_p8: - table[poly8x16x3_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 0, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl4_s8 => vqtbl4_s8: - table[int8x16x4_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31, 32, -33, 34, -35, 36, -37, 38, -39, 40, -41, 42, -43, 44, -45, 46, -47, 48, -49, 50, -51, 52, -53, 54, -55, 56, -57, 58, -59, 60, -61, 62, -63 ] | - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [0_i8, -15, -1, 24, 2, -13, -3, -29] | - ctrl[i8x8]: [4_i8, 46, 64, 51, 6, 71, 7, 18] => [4_i8, 46, 0, -51, 6, 0, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl4q_s8 => vqtbl4q_s8: - table[int8x16x4_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31, 32, -33, 34, -35, 36, -37, 38, -39, 40, -41, 42, -43, 44, -45, 46, -47, 48, -49, 50, -51, 52, -53, 54, -55, 56, -57, 58, -59, 60, -61, 62, -63 ] | - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 46, 64, 51, 6, 71, 7, 18] => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, 46, 0, -51, 6, 0, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl4_u8 => vqtbl4_u8: - table[uint8x16x4_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 ] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 0, 51, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl4q_u8 => vqtbl4q_u8: - table[uint8x16x4_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 0, 51, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl4_p8 => vqtbl4_p8: - table[poly8x16x4_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 ] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 0, 51, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbl!( test_vqtbl4q_p8 => vqtbl4q_p8: - table[poly8x16x4_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 0, 51, 6, 0, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx1_s8 => vqtbx1_s8: - table[int8x16_t]: [ 0_i8, -17, 34, 51, 68, 85, 102, 119, -106, -93, -84, -117, -104, -116, -72, -121 ] | - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | - ctrl[i8x8]: [127_i8, 15, 1, 14, 2, 13, 3, 12] => [100_i8, -121, -17, -72, 34, -116, 51, -104] | - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, 19, 7, 18] => [68_i8, -117, 102, -84, 102, -105, 119, -107] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx1q_s8 => vqtbx1q_s8: - table[int8x16_t]: [ 0_i8, -17, 34, 51, 68, 85, 102, 119, -106, -93, -84, -117, -104, -116, -72, -121 ] | - ext[int8x16_t]: [ 100_i8, -101, 102, -103, 104, -105, 106, -107, 108, -109, 110, -111, 112, -113, 114, -115 ] | - ctrl[i8x16]: [127_i8, 15, 1, 14, 2, 13, 3, 12, 4_i8, 11, 16, 10, 6, 19, 7, 18] => [100_i8, -121, -17, -72, 34, -116, 51, -104, 68, -117, 110, -84, 102, -113, 119, -115] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx1_u8 => vqtbx1_u8: - table[uint8x16_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 106, 93, 84, 117, 104, 116, 72, 121 ] | - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [100_u8, 121, 17, 72, 34, 116, 51, 104] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 102, 84, 102, 105, 119, 107] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx1q_u8 => vqtbx1q_u8: - table[uint8x16_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 106, 93, 84, 117, 104, 116, 72, 121 ] | - ext[uint8x16_t]: [ 100_u8, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115 ] | - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] => [100_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 110, 84, 102, 113, 119, 115] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx1_p8 => vqtbx1_p8: - table[poly8x16_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 106, 93, 84, 117, 104, 116, 72, 121 ] | - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [100_u8, 121, 17, 72, 34, 116, 51, 104] | - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 102, 84, 102, 105, 119, 107] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx1q_p8 => vqtbx1q_p8: - table[poly8x16_t]: [ 0_u8, 17, 34, 51, 68, 85, 102, 119, 106, 93, 84, 117, 104, 116, 72, 121 ] | - ext[poly8x16_t]: [ 100_u8, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115 ] | - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] => [100_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 110, 84, 102, 113, 119, 115] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx2_s8 => vqtbx2_s8: - table[int8x16x2_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31 ] | - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [100_i8, -15, -1, 24, 2, -13, -3, -29] | - ctrl[i8x8]: [4_i8, 31, 32, 10, 6, 49, 7, 18] => [4_i8, -31, 102, 10, 6, -105, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx2q_s8 => vqtbx2q_s8: - table[int8x16x2_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31 ] | - ext[int8x16_t]: [ 100_i8, -101, 102, -103, 104, -105, 106, -107, 108, -109, 110, -111, 112, -113, 114, -115 ] | - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 31, 32, 10, 6, 49, 7, 18] => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, -31, 110, 10, 6, -113, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx2_u8 => vqtbx2_u8: - table[uint8x16x2_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ] | - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 102, 10, 6, 105, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx2q_u8 => vqtbx2q_u8: - table[uint8x16x2_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ] | - ext[uint8x16_t]: [ 100_u8, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 110, 10, 6, 113, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx2_p8 => vqtbx2_p8: - table[poly8x16x2_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ] | - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 102, 10, 6, 105, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx2q_p8 => vqtbx2q_p8: - table[poly8x16x2_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ] | - ext[poly8x16_t]: [ 100_u8, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 110, 10, 6, 113, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx3_s8 => vqtbx3_s8: - table[int8x16x3_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31, 32, -33, 34, -35, 36, -37, 38, -39, 40, -41, 42, -43, 44, -45, 46, -47 ] | - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [100_i8, -15, -1, 24, 2, -13, -3, -29] | - ctrl[i8x8]: [4_i8, 32, 46, 51, 6, 49, 7, 18] => [4_i8, 32, 46, -103, 6, -105, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx3q_s8 => vqtbx3q_s8: - table[int8x16x3_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31, 32, -33, 34, -35, 36, -37, 38, -39, 40, -41, 42, -43, 44, -45, 46, -47 ] | - ext[int8x16_t]: [ 100_i8, -101, 102, -103, 104, -105, 106, -107, 108, -109, 110, -111, 112, -113, 114, -115 ] | - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 32, 46, 51, 6, 49, 7, 18] => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, 32, 46, -111, 6, -113, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx3_u8 => vqtbx3_u8: - table[uint8x16x3_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 ] | - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 103, 6, 105, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx3q_u8 => vqtbx3q_u8: - table[uint8x16x3_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 ] | - ext[uint8x16_t]: [ 100_u8, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 111, 6, 113, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx3_p8 => vqtbx3_p8: - table[poly8x16x3_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 ] | - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 103, 6, 105, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx3q_p8 => vqtbx3q_p8: - table[poly8x16x3_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47 ] | - ext[poly8x16_t]: [ 100_u8, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 111, 6, 113, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx4_s8 => vqtbx4_s8: - table[int8x16x4_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31, 32, -33, 34, -35, 36, -37, 38, -39, 40, -41, 42, -43, 44, -45, 46, -47, 48, -49, 50, -51, 52, -53, 54, -55, 56, -57, 58, -59, 60, -61, 62, -63 ] | - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [100_i8, -15, -1, 24, 2, -13, -3, -29] | - ctrl[i8x8]: [4_i8, 46, 64, 51, 6, 71, 7, 18] => [4_i8, 46, 102, -51, 6, -105, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx4q_s8 => vqtbx4q_s8: - table[int8x16x4_t]: [ 0_i8, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15, 16, -17, 18, -19, 20, -21, 22, -23, 24, -25, 26, -27, 28, -29, 30, -31, 32, -33, 34, -35, 36, -37, 38, -39, 40, -41, 42, -43, 44, -45, 46, -47, 48, -49, 50, -51, 52, -53, 54, -55, 56, -57, 58, -59, 60, -61, 62, -63 ] | - ext[int8x16_t]: [ 100_i8, -101, 102, -103, 104, -105, 106, -107, 108, -109, 110, -111, 112, -113, 114, -115 ] | - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 46, 64, 51, 6, 71, 7, 18] => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, 46, 110, -51, 6, -113, -7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx4_u8 => vqtbx4_u8: - table[uint8x16x4_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 ] | - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 102, 51, 6, 105, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx4q_u8 => vqtbx4q_u8: - table[uint8x16x4_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 ] | - ext[uint8x16_t]: [ 100_u8, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 110, 51, 6, 113, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx4_p8 => vqtbx4_p8: - table[poly8x16x4_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 ] | - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 102, 51, 6, 105, 7, 18] ); #[cfg(target_arch = "aarch64")] test_vtbx!( test_vqtbx4q_p8 => vqtbx4q_p8: - table[poly8x16x4_t]: [ 0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 ] | - ext[poly8x16_t]: [ 100_u8, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115 ] | - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 110, 51, 6, 113, 7, 18] ); core_arch-0.1.5/src/arm/v6.rs010064400007650000024000000020501343447103600141320ustar0000000000000000//! ARMv6 intrinsics. //! //! The reference is [ARMv6-M Architecture Reference Manual][armv6m]. //! //! [armv6m]: //! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0419c/index. //! html #[cfg(test)] use stdsimd_test::assert_instr; /// Reverse the order of the bytes. #[inline] #[cfg_attr(test, assert_instr(rev))] pub unsafe fn _rev_u16(x: u16) -> u16 { x.swap_bytes() as u16 } /// Reverse the order of the bytes. #[inline] #[cfg_attr(test, assert_instr(rev))] pub unsafe fn _rev_u32(x: u32) -> u32 { x.swap_bytes() as u32 } #[cfg(test)] mod tests { use crate::core_arch::arm::v6; #[test] fn _rev_u16() { unsafe { assert_eq!( v6::_rev_u16(0b0000_0000_1111_1111_u16), 0b1111_1111_0000_0000_u16 ); } } #[test] fn _rev_u32() { unsafe { assert_eq!( v6::_rev_u32(0b0000_0000_1111_1111_0000_0000_1111_1111_u32), 0b1111_1111_0000_0000_1111_1111_0000_0000_u32 ); } } } core_arch-0.1.5/src/arm/v7.rs010064400007650000024000000044451343447103600141450ustar0000000000000000//! ARMv7 intrinsics. //! //! The reference is [ARMv7-M Architecture Reference Manual (Issue //! E.b)][armv7m]. //! //! [armv7m]: //! http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0403e. //! b/index.html pub use super::v6::*; #[cfg(test)] use stdsimd_test::assert_instr; /// Count Leading Zeros. #[inline] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382 // #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))] pub unsafe fn _clz_u8(x: u8) -> u8 { x.leading_zeros() as u8 } /// Count Leading Zeros. #[inline] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382 // #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))] pub unsafe fn _clz_u16(x: u16) -> u16 { x.leading_zeros() as u16 } /// Count Leading Zeros. #[inline] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382 // #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))] pub unsafe fn _clz_u32(x: u32) -> u32 { x.leading_zeros() as u32 } /// Reverse the bit order. #[inline] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(test, assert_instr(rbit))] pub unsafe fn _rbit_u32(x: u32) -> u32 { crate::intrinsics::bitreverse(x) } #[cfg(test)] mod tests { use crate::core_arch::arm::v7; #[test] fn _clz_u8() { unsafe { assert_eq!(v7::_clz_u8(0b0000_1010u8), 4u8); } } #[test] fn _clz_u16() { unsafe { assert_eq!(v7::_clz_u16(0b0000_1010u16), 12u16); } } #[test] fn _clz_u32() { unsafe { assert_eq!(v7::_clz_u32(0b0000_1010u32), 28u32); } } #[test] #[cfg(dont_compile_me)] // FIXME need to add `v7` upstream in rustc fn _rbit_u32() { unsafe { assert_eq!( v7::_rbit_u32(0b0000_1010u32), 0b0101_0000_0000_0000_0000_0000_0000_0000u32 ); } } } core_arch-0.1.5/src/core_arch_docs.md010064400007650000024000000267501342455103600157440ustar0000000000000000SIMD and vendor intrinsics module. This module is intended to be the gateway to architecture-specific intrinsic functions, typically related to SIMD (but not always!). Each architecture that Rust compiles to may contain a submodule here, which means that this is not a portable module! If you're writing a portable library take care when using these APIs! Under this module you'll find an architecture-named module, such as `x86_64`. Each `#[cfg(target_arch)]` that Rust can compile to may have a module entry here, only present on that particular target. For example the `i686-pc-windows-msvc` target will have an `x86` module here, whereas `x86_64-pc-windows-msvc` has `x86_64`. [rfc]: https://github.com/rust-lang/rfcs/pull/2325 [tracked]: https://github.com/rust-lang/rust/issues/48556 # Overview This module exposes vendor-specific intrinsics that typically correspond to a single machine instruction. These intrinsics are not portable: their availability is architecture-dependent, and not all machines of that architecture might provide the intrinsic. The `arch` module is intended to be a low-level implementation detail for higher-level APIs. Using it correctly can be quite tricky as you need to ensure at least a few guarantees are upheld: * The correct architecture's module is used. For example the `arm` module isn't available on the `x86_64-unknown-linux-gnu` target. This is typically done by ensuring that `#[cfg]` is used appropriately when using this module. * The CPU the program is currently running on supports the function being called. For example it is unsafe to call an AVX2 function on a CPU that doesn't actually support AVX2. As a result of the latter of these guarantees all intrinsics in this module are `unsafe` and extra care needs to be taken when calling them! # CPU Feature Detection In order to call these APIs in a safe fashion there's a number of mechanisms available to ensure that the correct CPU feature is available to call an intrinsic. Let's consider, for example, the `_mm256_add_epi64` intrinsics on the `x86` and `x86_64` architectures. This function requires the AVX2 feature as [documented by Intel][intel-dox] so to correctly call this function we need to (a) guarantee we only call it on `x86`/`x86_64` and (b) ensure that the CPU feature is available [intel-dox]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64&expand=100 ## Static CPU Feature Detection The first option available to us is to conditionally compile code via the `#[cfg]` attribute. CPU features correspond to the `target_feature` cfg available, and can be used like so: ```ignore #[cfg( all( any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx2" ) )] fn foo() { #[cfg(target_arch = "x86")] use std::arch::x86::_mm256_add_epi64; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::_mm256_add_epi64; unsafe { _mm256_add_epi64(...); } } ``` Here we're using `#[cfg(target_feature = "avx2")]` to conditionally compile this function into our module. This means that if the `avx2` feature is *enabled statically* then we'll use the `_mm256_add_epi64` function at runtime. The `unsafe` block here can be justified through the usage of `#[cfg]` to only compile the code in situations where the safety guarantees are upheld. Statically enabling a feature is typically done with the `-C target-feature` or `-C target-cpu` flags to the compiler. For example if your local CPU supports AVX2 then you can compile the above function with: ```sh $ RUSTFLAGS='-C target-cpu=native' cargo build ``` Or otherwise you can specifically enable just the AVX2 feature: ```sh $ RUSTFLAGS='-C target-feature=+avx2' cargo build ``` Note that when you compile a binary with a particular feature enabled it's important to ensure that you only run the binary on systems which satisfy the required feature set. ## Dynamic CPU Feature Detection Sometimes statically dispatching isn't quite what you want. Instead you might want to build a portable binary that runs across a variety of CPUs, but at runtime it selects the most optimized implementation available. This allows you to build a "least common denominator" binary which has certain sections more optimized for different CPUs. Taking our previous example from before, we're going to compile our binary *without* AVX2 support, but we'd like to enable it for just one function. We can do that in a manner like: ```ignore fn foo() { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if is_x86_feature_detected!("avx2") { return unsafe { foo_avx2() }; } } // fallback implementation without using AVX2 } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[target_feature(enable = "avx2")] unsafe fn foo_avx2() { #[cfg(target_arch = "x86")] use std::arch::x86::_mm256_add_epi64; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::_mm256_add_epi64; _mm256_add_epi64(...); } ``` There's a couple of components in play here, so let's go through them in detail! * First up we notice the `is_x86_feature_detected!` macro. Provided by the standard library, this macro will perform necessary runtime detection to determine whether the CPU the program is running on supports the specified feature. In this case the macro will expand to a boolean expression evaluating to whether the local CPU has the AVX2 feature or not. Note that this macro, like the `arch` module, is platform-specific. For example calling `is_x86_feature_detected!("avx2")` on ARM will be a compile time error. To ensure we don't hit this error a statement level `#[cfg]` is used to only compile usage of the macro on `x86`/`x86_64`. * Next up we see our AVX2-enabled function, `foo_avx2`. This function is decorated with the `#[target_feature]` attribute which enables a CPU feature for just this one function. Using a compiler flag like `-C target-feature=+avx2` will enable AVX2 for the entire program, but using an attribute will only enable it for the one function. Usage of the `#[target_feature]` attribute currently requires the function to also be `unsafe`, as we see here. This is because the function can only be correctly called on systems which have the AVX2 (like the intrinsics themselves). And with all that we should have a working program! This program will run across all machines and it'll use the optimized AVX2 implementation on machines where support is detected. # Ergonomics It's important to note that using the `arch` module is not the easiest thing in the world, so if you're curious to try it out you may want to brace yourself for some wordiness! The primary purpose of this module is to enable stable crates on crates.io to build up much more ergonomic abstractions which end up using SIMD under the hood. Over time these abstractions may also move into the standard library itself, but for now this module is tasked with providing the bare minimum necessary to use vendor intrinsics on stable Rust. # Other architectures This documentation is only for one particular architecture, you can find others at: * [`x86`] * [`x86_64`] * [`arm`] * [`aarch64`] * [`mips`] * [`mips64`] * [`powerpc`] * [`powerpc64`] * [`nvptx`] * [`wasm32`] [`x86`]: x86/index.html [`x86_64`]: x86_64/index.html [`arm`]: arm/index.html [`aarch64`]: aarch64/index.html [`mips`]: mips/index.html [`mips64`]: mips64/index.html [`powerpc`]: powerpc/index.html [`powerpc64`]: powerpc64/index.html [`nvptx`]: nvptx/index.html [`wasm32`]: wasm32/index.html # Examples First let's take a look at not actually using any intrinsics but instead using LLVM's auto-vectorization to produce optimized vectorized code for AVX2 and also for the default platform. ```rust # #![cfg_attr(not(dox),feature(stdsimd))] # #[cfg(not(dox))] # #[macro_use(is_x86_feature_detected)] # extern crate std_detect; fn main() { let mut dst = [0]; add_quickly(&[1], &[2], &mut dst); assert_eq!(dst[0], 3); } fn add_quickly(a: &[u8], b: &[u8], c: &mut [u8]) { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { // Note that this `unsafe` block is safe because we're testing // that the `avx2` feature is indeed available on our CPU. if is_x86_feature_detected!("avx2") { return unsafe { add_quickly_avx2(a, b, c) }; } } add_quickly_fallback(a, b, c) } #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[target_feature(enable = "avx2")] unsafe fn add_quickly_avx2(a: &[u8], b: &[u8], c: &mut [u8]) { add_quickly_fallback(a, b, c) // the function below is inlined here } fn add_quickly_fallback(a: &[u8], b: &[u8], c: &mut [u8]) { for ((a, b), c) in a.iter().zip(b).zip(c) { *c = *a + *b; } } ``` Next up let's take a look at an example of manually using intrinsics. Here we'll be using SSE4.1 features to implement hex encoding. ``` fn main() { let mut dst = [0; 32]; hex_encode(b"\x01\x02\x03", &mut dst); assert_eq!(&dst[..6], b"010203"); let mut src = [0; 16]; for i in 0..16 { src[i] = (i + 1) as u8; } hex_encode(&src, &mut dst); assert_eq!(&dst, b"0102030405060708090a0b0c0d0e0f10"); } pub fn hex_encode(src: &[u8], dst: &mut [u8]) { let len = src.len().checked_mul(2).unwrap(); assert!(dst.len() >= len); #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if is_x86_feature_detected!("sse4.1") { return unsafe { hex_encode_sse41(src, dst) }; } } hex_encode_fallback(src, dst) } // translated from // https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp #[target_feature(enable = "sse4.1")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] unsafe fn hex_encode_sse41(mut src: &[u8], dst: &mut [u8]) { #[cfg(target_arch = "x86")] use std::arch::x86::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; let ascii_zero = _mm_set1_epi8(b'0' as i8); let nines = _mm_set1_epi8(9); let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8); let and4bits = _mm_set1_epi8(0xf); let mut i = 0_isize; while src.len() >= 16 { let invec = _mm_loadu_si128(src.as_ptr() as *const _); let masked1 = _mm_and_si128(invec, and4bits); let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits); // return 0xff corresponding to the elements > 9, or 0x00 otherwise let cmpmask1 = _mm_cmpgt_epi8(masked1, nines); let cmpmask2 = _mm_cmpgt_epi8(masked2, nines); // add '0' or the offset depending on the masks let masked1 = _mm_add_epi8( masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1), ); let masked2 = _mm_add_epi8( masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2), ); // interleave masked1 and masked2 bytes let res1 = _mm_unpacklo_epi8(masked2, masked1); let res2 = _mm_unpackhi_epi8(masked2, masked1); _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1); _mm_storeu_si128( dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2, ); src = &src[16..]; i += 16; } let i = i as usize; hex_encode_fallback(src, &mut dst[i * 2..]); } fn hex_encode_fallback(src: &[u8], dst: &mut [u8]) { fn hex(byte: u8) -> u8 { static TABLE: &[u8] = b"0123456789abcdef"; TABLE[byte as usize] } for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) { slots[0] = hex((*byte >> 4) & 0xf); slots[1] = hex(*byte & 0xf); } } ``` core_arch-0.1.5/src/lib.rs010064400007650000024000000034761345561510300136010ustar0000000000000000#![doc(include = "core_arch_docs.md")] #![cfg_attr(stdsimd_strict, deny(warnings))] #![allow(dead_code)] #![allow(unused_features)] #![feature( const_fn, const_fn_union, link_llvm_intrinsics, platform_intrinsics, repr_simd, simd_ffi, asm, proc_macro_hygiene, stmt_expr_attributes, core_intrinsics, no_core, rustc_attrs, stdsimd, staged_api, align_offset, maybe_uninit, doc_cfg, mmx_target_feature, tbm_target_feature, sse4a_target_feature, arm_target_feature, aarch64_target_feature, cmpxchg16b_target_feature, avx512_target_feature, mips_target_feature, powerpc_target_feature, wasm_target_feature, abi_unadjusted, adx_target_feature, external_doc )] #![cfg_attr(test, feature(test, abi_vectorcall, untagged_unions))] #![deny(clippy::missing_inline_in_public_items)] #![allow( clippy::inline_always, clippy::too_many_arguments, clippy::cast_sign_loss, clippy::cast_lossless, clippy::cast_possible_wrap, clippy::cast_possible_truncation, clippy::cast_precision_loss, clippy::shadow_reuse, clippy::cognitive_complexity, clippy::similar_names, clippy::many_single_char_names )] #![cfg_attr(test, allow(unused_imports))] #![no_std] #![unstable(feature = "stdsimd", issue = "27731")] #![doc( test(attr(deny(warnings))), test(attr(allow(dead_code, deprecated, unused_variables, unused_mut))) )] #[cfg(test)] #[macro_use] extern crate std; #[cfg(test)] #[macro_use] extern crate std_detect; #[cfg(test)] extern crate stdsimd_test; #[cfg(test)] extern crate test; #[cfg(all(test, target_arch = "wasm32"))] extern crate wasm_bindgen_test; #[path = "mod.rs"] mod core_arch; pub use self::core_arch::arch::*; #[allow(unused_imports)] use core::{ffi, intrinsics, marker, mem, ptr, sync}; core_arch-0.1.5/src/macros.rs010064400007650000024000000215221345561510300143070ustar0000000000000000//! Utility macros. #[allow(unused)] macro_rules! constify_imm8 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b1111_1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), 7 => $expand!(7), 8 => $expand!(8), 9 => $expand!(9), 10 => $expand!(10), 11 => $expand!(11), 12 => $expand!(12), 13 => $expand!(13), 14 => $expand!(14), 15 => $expand!(15), 16 => $expand!(16), 17 => $expand!(17), 18 => $expand!(18), 19 => $expand!(19), 20 => $expand!(20), 21 => $expand!(21), 22 => $expand!(22), 23 => $expand!(23), 24 => $expand!(24), 25 => $expand!(25), 26 => $expand!(26), 27 => $expand!(27), 28 => $expand!(28), 29 => $expand!(29), 30 => $expand!(30), 31 => $expand!(31), 32 => $expand!(32), 33 => $expand!(33), 34 => $expand!(34), 35 => $expand!(35), 36 => $expand!(36), 37 => $expand!(37), 38 => $expand!(38), 39 => $expand!(39), 40 => $expand!(40), 41 => $expand!(41), 42 => $expand!(42), 43 => $expand!(43), 44 => $expand!(44), 45 => $expand!(45), 46 => $expand!(46), 47 => $expand!(47), 48 => $expand!(48), 49 => $expand!(49), 50 => $expand!(50), 51 => $expand!(51), 52 => $expand!(52), 53 => $expand!(53), 54 => $expand!(54), 55 => $expand!(55), 56 => $expand!(56), 57 => $expand!(57), 58 => $expand!(58), 59 => $expand!(59), 60 => $expand!(60), 61 => $expand!(61), 62 => $expand!(62), 63 => $expand!(63), 64 => $expand!(64), 65 => $expand!(65), 66 => $expand!(66), 67 => $expand!(67), 68 => $expand!(68), 69 => $expand!(69), 70 => $expand!(70), 71 => $expand!(71), 72 => $expand!(72), 73 => $expand!(73), 74 => $expand!(74), 75 => $expand!(75), 76 => $expand!(76), 77 => $expand!(77), 78 => $expand!(78), 79 => $expand!(79), 80 => $expand!(80), 81 => $expand!(81), 82 => $expand!(82), 83 => $expand!(83), 84 => $expand!(84), 85 => $expand!(85), 86 => $expand!(86), 87 => $expand!(87), 88 => $expand!(88), 89 => $expand!(89), 90 => $expand!(90), 91 => $expand!(91), 92 => $expand!(92), 93 => $expand!(93), 94 => $expand!(94), 95 => $expand!(95), 96 => $expand!(96), 97 => $expand!(97), 98 => $expand!(98), 99 => $expand!(99), 100 => $expand!(100), 101 => $expand!(101), 102 => $expand!(102), 103 => $expand!(103), 104 => $expand!(104), 105 => $expand!(105), 106 => $expand!(106), 107 => $expand!(107), 108 => $expand!(108), 109 => $expand!(109), 110 => $expand!(110), 111 => $expand!(111), 112 => $expand!(112), 113 => $expand!(113), 114 => $expand!(114), 115 => $expand!(115), 116 => $expand!(116), 117 => $expand!(117), 118 => $expand!(118), 119 => $expand!(119), 120 => $expand!(120), 121 => $expand!(121), 122 => $expand!(122), 123 => $expand!(123), 124 => $expand!(124), 125 => $expand!(125), 126 => $expand!(126), 127 => $expand!(127), 128 => $expand!(128), 129 => $expand!(129), 130 => $expand!(130), 131 => $expand!(131), 132 => $expand!(132), 133 => $expand!(133), 134 => $expand!(134), 135 => $expand!(135), 136 => $expand!(136), 137 => $expand!(137), 138 => $expand!(138), 139 => $expand!(139), 140 => $expand!(140), 141 => $expand!(141), 142 => $expand!(142), 143 => $expand!(143), 144 => $expand!(144), 145 => $expand!(145), 146 => $expand!(146), 147 => $expand!(147), 148 => $expand!(148), 149 => $expand!(149), 150 => $expand!(150), 151 => $expand!(151), 152 => $expand!(152), 153 => $expand!(153), 154 => $expand!(154), 155 => $expand!(155), 156 => $expand!(156), 157 => $expand!(157), 158 => $expand!(158), 159 => $expand!(159), 160 => $expand!(160), 161 => $expand!(161), 162 => $expand!(162), 163 => $expand!(163), 164 => $expand!(164), 165 => $expand!(165), 166 => $expand!(166), 167 => $expand!(167), 168 => $expand!(168), 169 => $expand!(169), 170 => $expand!(170), 171 => $expand!(171), 172 => $expand!(172), 173 => $expand!(173), 174 => $expand!(174), 175 => $expand!(175), 176 => $expand!(176), 177 => $expand!(177), 178 => $expand!(178), 179 => $expand!(179), 180 => $expand!(180), 181 => $expand!(181), 182 => $expand!(182), 183 => $expand!(183), 184 => $expand!(184), 185 => $expand!(185), 186 => $expand!(186), 187 => $expand!(187), 188 => $expand!(188), 189 => $expand!(189), 190 => $expand!(190), 191 => $expand!(191), 192 => $expand!(192), 193 => $expand!(193), 194 => $expand!(194), 195 => $expand!(195), 196 => $expand!(196), 197 => $expand!(197), 198 => $expand!(198), 199 => $expand!(199), 200 => $expand!(200), 201 => $expand!(201), 202 => $expand!(202), 203 => $expand!(203), 204 => $expand!(204), 205 => $expand!(205), 206 => $expand!(206), 207 => $expand!(207), 208 => $expand!(208), 209 => $expand!(209), 210 => $expand!(210), 211 => $expand!(211), 212 => $expand!(212), 213 => $expand!(213), 214 => $expand!(214), 215 => $expand!(215), 216 => $expand!(216), 217 => $expand!(217), 218 => $expand!(218), 219 => $expand!(219), 220 => $expand!(220), 221 => $expand!(221), 222 => $expand!(222), 223 => $expand!(223), 224 => $expand!(224), 225 => $expand!(225), 226 => $expand!(226), 227 => $expand!(227), 228 => $expand!(228), 229 => $expand!(229), 230 => $expand!(230), 231 => $expand!(231), 232 => $expand!(232), 233 => $expand!(233), 234 => $expand!(234), 235 => $expand!(235), 236 => $expand!(236), 237 => $expand!(237), 238 => $expand!(238), 239 => $expand!(239), 240 => $expand!(240), 241 => $expand!(241), 242 => $expand!(242), 243 => $expand!(243), 244 => $expand!(244), 245 => $expand!(245), 246 => $expand!(246), 247 => $expand!(247), 248 => $expand!(248), 249 => $expand!(249), 250 => $expand!(250), 251 => $expand!(251), 252 => $expand!(252), 253 => $expand!(253), 254 => $expand!(254), _ => $expand!(255), } }; } #[allow(unused)] macro_rules! types { ($( $(#[$doc:meta])* pub struct $name:ident($($fields:tt)*); )*) => ($( $(#[$doc])* #[derive(Copy, Clone, Debug)] #[allow(non_camel_case_types)] #[repr(simd)] #[allow(clippy::missing_inline_in_public_items)] pub struct $name($($fields)*); )*) } core_arch-0.1.5/src/mips/mod.rs010064400007650000024000000003631343447103600145540ustar0000000000000000//! MIPS mod msa; pub use self::msa::*; #[cfg(test)] use stdsimd_test::assert_instr; /// Generates the trap instruction `BREAK` #[cfg_attr(test, assert_instr(break))] #[inline] pub unsafe fn break_() -> ! { crate::intrinsics::abort() } core_arch-0.1.5/src/mips/msa.rs010064400007650000024000021774661345531200000145660ustar0000000000000000//! MIPS SIMD Architecture intrinsics //! //! The reference is [MIPS Architecture for Programmers Volume IV-j: The //! MIPS32 SIMD Architecture Module Revision 1.12][msa_ref]. //! //! [msa_ref]: http://cdn2.imgtec.com/documentation/MD00866-2B-MSA32-AFP-01.12.pdf #[cfg(test)] use stdsimd_test::assert_instr; #[macro_use] mod macros; types! { // / MIPS-specific 128-bit wide vector of 16 packed `i8`. pub struct v16i8( i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, ); // / MIPS-specific 128-bit wide vector of 8 packed `i16`. pub struct v8i16( i16, i16, i16, i16, i16, i16, i16, i16, ); // / MIPS-specific 128-bit wide vector of 4 packed `i32`. pub struct v4i32( i32, i32, i32, i32, ); // / MIPS-specific 128-bit wide vector of 2 packed `i64`. pub struct v2i64( i64, i64, ); // / MIPS-specific 128-bit wide vector of 16 packed `u8`. pub struct v16u8( u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, ); // / MIPS-specific 128-bit wide vector of 8 packed `u16`. pub struct v8u16( u16, u16, u16, u16, u16, u16, u16, u16, ); // / MIPS-specific 128-bit wide vector of 4 packed `u32`. pub struct v4u32( u32, u32, u32, u32, ); // / MIPS-specific 128-bit wide vector of 2 packed `u64`. pub struct v2u64( u64, u64, ); // / MIPS-specific 128-bit wide vector of 4 packed `f32`. pub struct v4f32( f32, f32, f32, f32, ); // / MIPS-specific 128-bit wide vector of 2 packed `f64`. pub struct v2f64( f64, f64, ); } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.mips.add.a.b"] fn msa_add_a_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.add.a.h"] fn msa_add_a_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.add.a.w"] fn msa_add_a_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.add.a.d"] fn msa_add_a_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.adds.a.b"] fn msa_adds_a_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.adds.a.h"] fn msa_adds_a_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.adds.a.w"] fn msa_adds_a_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.adds.a.d"] fn msa_adds_a_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.adds.s.b"] fn msa_adds_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.adds.s.h"] fn msa_adds_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.adds.s.w"] fn msa_adds_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.adds.s.d"] fn msa_adds_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.adds.u.b"] fn msa_adds_u_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.adds.u.h"] fn msa_adds_u_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.adds.u.w"] fn msa_adds_u_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.adds.u.d"] fn msa_adds_u_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.addv.b"] fn msa_addv_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.addv.h"] fn msa_addv_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.addv.w"] fn msa_addv_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.addv.d"] fn msa_addv_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.addvi.b"] fn msa_addvi_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.addvi.h"] fn msa_addvi_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.addvi.w"] fn msa_addvi_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.addvi.d"] fn msa_addvi_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.and.v"] fn msa_and_v(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.andi.b"] fn msa_andi_b(a: v16u8, b: i32) -> v16u8; #[link_name = "llvm.mips.asub.s.b"] fn msa_asub_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.asub.s.h"] fn msa_asub_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.asub.s.w"] fn msa_asub_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.asub.s.d"] fn msa_asub_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.asub.u.b"] fn msa_asub_u_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.asub.u.h"] fn msa_asub_u_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.asub.u.w"] fn msa_asub_u_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.asub.u.d"] fn msa_asub_u_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.ave.s.b"] fn msa_ave_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.ave.s.h"] fn msa_ave_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.ave.s.w"] fn msa_ave_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.ave.s.d"] fn msa_ave_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.ave.u.b"] fn msa_ave_u_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.ave.u.h"] fn msa_ave_u_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.ave.u.w"] fn msa_ave_u_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.ave.u.d"] fn msa_ave_u_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.aver.s.b"] fn msa_aver_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.aver.s.h"] fn msa_aver_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.aver.s.w"] fn msa_aver_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.aver.s.d"] fn msa_aver_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.aver.s.b"] fn msa_aver_u_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.aver.s.h"] fn msa_aver_u_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.aver.s.w"] fn msa_aver_u_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.aver.s.d"] fn msa_aver_u_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.bclr.b"] fn msa_bclr_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.bclr.h"] fn msa_bclr_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.bclr.w"] fn msa_bclr_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.bclr.d"] fn msa_bclr_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.bclri.b"] fn msa_bclri_b(a: v16u8, b: i32) -> v16u8; #[link_name = "llvm.mips.bclri.h"] fn msa_bclri_h(a: v8u16, b: i32) -> v8u16; #[link_name = "llvm.mips.bclri.w"] fn msa_bclri_w(a: v4u32, b: i32) -> v4u32; #[link_name = "llvm.mips.bclri.d"] fn msa_bclri_d(a: v2u64, b: i32) -> v2u64; #[link_name = "llvm.mips.binsl.b"] fn msa_binsl_b(a: v16u8, b: v16u8, c: v16u8) -> v16u8; #[link_name = "llvm.mips.binsl.h"] fn msa_binsl_h(a: v8u16, b: v8u16, c: v8u16) -> v8u16; #[link_name = "llvm.mips.binsl.w"] fn msa_binsl_w(a: v4u32, b: v4u32, c: v4u32) -> v4u32; #[link_name = "llvm.mips.binsl.d"] fn msa_binsl_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64; #[link_name = "llvm.mips.binsli.b"] fn msa_binsli_b(a: v16u8, b: v16u8, c: i32) -> v16u8; #[link_name = "llvm.mips.binsli.h"] fn msa_binsli_h(a: v8u16, b: v8u16, c: i32) -> v8u16; #[link_name = "llvm.mips.binsli.w"] fn msa_binsli_w(a: v4u32, b: v4u32, c: i32) -> v4u32; #[link_name = "llvm.mips.binsli.d"] fn msa_binsli_d(a: v2u64, b: v2u64, c: i32) -> v2u64; #[link_name = "llvm.mips.binsr.b"] fn msa_binsr_b(a: v16u8, b: v16u8, c: v16u8) -> v16u8; #[link_name = "llvm.mips.binsr.h"] fn msa_binsr_h(a: v8u16, b: v8u16, c: v8u16) -> v8u16; #[link_name = "llvm.mips.binsr.w"] fn msa_binsr_w(a: v4u32, b: v4u32, c: v4u32) -> v4u32; #[link_name = "llvm.mips.binsr.d"] fn msa_binsr_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64; #[link_name = "llvm.mips.binsri.b"] fn msa_binsri_b(a: v16u8, b: v16u8, c: i32) -> v16u8; #[link_name = "llvm.mips.binsri.h"] fn msa_binsri_h(a: v8u16, b: v8u16, c: i32) -> v8u16; #[link_name = "llvm.mips.binsri.w"] fn msa_binsri_w(a: v4u32, b: v4u32, c: i32) -> v4u32; #[link_name = "llvm.mips.binsri.d"] fn msa_binsri_d(a: v2u64, b: v2u64, c: i32) -> v2u64; #[link_name = "llvm.mips.bmnz.v"] fn msa_bmnz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8; #[link_name = "llvm.mips.bmnzi.b"] fn msa_bmnzi_b(a: v16u8, b: v16u8, c: i32) -> v16u8; #[link_name = "llvm.mips.bmz.v"] fn msa_bmz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8; #[link_name = "llvm.mips.bmzi.b"] fn msa_bmzi_b(a: v16u8, b: v16u8, c: i32) -> v16u8; #[link_name = "llvm.mips.bneg.b"] fn msa_bneg_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.bneg.h"] fn msa_bneg_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.bneg.w"] fn msa_bneg_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.bneg.d"] fn msa_bneg_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.bnegi.b"] fn msa_bnegi_b(a: v16u8, b: i32) -> v16u8; #[link_name = "llvm.mips.bnegi.h"] fn msa_bnegi_h(a: v8u16, b: i32) -> v8u16; #[link_name = "llvm.mips.bnegi.w"] fn msa_bnegi_w(a: v4u32, b: i32) -> v4u32; #[link_name = "llvm.mips.bnegi.d"] fn msa_bnegi_d(a: v2u64, b: i32) -> v2u64; #[link_name = "llvm.mips.bnz.b"] fn msa_bnz_b(a: v16u8) -> i32; #[link_name = "llvm.mips.bnz.h"] fn msa_bnz_h(a: v8u16) -> i32; #[link_name = "llvm.mips.bnz.w"] fn msa_bnz_w(a: v4u32) -> i32; #[link_name = "llvm.mips.bnz.d"] fn msa_bnz_d(a: v2u64) -> i32; #[link_name = "llvm.mips.bnz.v"] fn msa_bnz_v(a: v16u8) -> i32; #[link_name = "llvm.mips.bsel.v"] fn msa_bsel_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8; #[link_name = "llvm.mips.bseli.b"] fn msa_bseli_b(a: v16u8, b: v16u8, c: i32) -> v16u8; #[link_name = "llvm.mips.bset.b"] fn msa_bset_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.bset.h"] fn msa_bset_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.bset.w"] fn msa_bset_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.bset.d"] fn msa_bset_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.bseti.b"] fn msa_bseti_b(a: v16u8, b: i32) -> v16u8; #[link_name = "llvm.mips.bseti.h"] fn msa_bseti_h(a: v8u16, b: i32) -> v8u16; #[link_name = "llvm.mips.bseti.w"] fn msa_bseti_w(a: v4u32, b: i32) -> v4u32; #[link_name = "llvm.mips.bseti.d"] fn msa_bseti_d(a: v2u64, b: i32) -> v2u64; #[link_name = "llvm.mips.bz.b"] fn msa_bz_b(a: v16u8) -> i32; #[link_name = "llvm.mips.bz.h"] fn msa_bz_h(a: v8u16) -> i32; #[link_name = "llvm.mips.bz.w"] fn msa_bz_w(a: v4u32) -> i32; #[link_name = "llvm.mips.bz.d"] fn msa_bz_d(a: v2u64) -> i32; #[link_name = "llvm.mips.bz.v"] fn msa_bz_v(a: v16u8) -> i32; #[link_name = "llvm.mips.ceq.b"] fn msa_ceq_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.ceq.h"] fn msa_ceq_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.ceq.w"] fn msa_ceq_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.ceq.d"] fn msa_ceq_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.ceqi.b"] fn msa_ceqi_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.ceqi.h"] fn msa_ceqi_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.ceqi.w"] fn msa_ceqi_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.ceqi.d"] fn msa_ceqi_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.cfcmsa"] fn msa_cfcmsa(a: i32) -> i32; #[link_name = "llvm.mips.cle.s.b"] fn msa_cle_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.cle.s.h"] fn msa_cle_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.cle.s.w"] fn msa_cle_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.cle.s.d"] fn msa_cle_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.cle.u.b"] fn msa_cle_u_b(a: v16u8, b: v16u8) -> v16i8; #[link_name = "llvm.mips.cle.u.h"] fn msa_cle_u_h(a: v8u16, b: v8u16) -> v8i16; #[link_name = "llvm.mips.cle.u.w"] fn msa_cle_u_w(a: v4u32, b: v4u32) -> v4i32; #[link_name = "llvm.mips.cle.u.d"] fn msa_cle_u_d(a: v2u64, b: v2u64) -> v2i64; #[link_name = "llvm.mips.clei.s.b"] fn msa_clei_s_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.clei.s.h"] fn msa_clei_s_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.clei.s.w"] fn msa_clei_s_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.clei.s.d"] fn msa_clei_s_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.clei.u.b"] fn msa_clei_u_b(a: v16u8, b: i32) -> v16i8; #[link_name = "llvm.mips.clei.u.h"] fn msa_clei_u_h(a: v8u16, b: i32) -> v8i16; #[link_name = "llvm.mips.clei.u.w"] fn msa_clei_u_w(a: v4u32, b: i32) -> v4i32; #[link_name = "llvm.mips.clei.u.d"] fn msa_clei_u_d(a: v2u64, b: i32) -> v2i64; #[link_name = "llvm.mips.clt.s.b"] fn msa_clt_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.clt.s.h"] fn msa_clt_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.clt.s.w"] fn msa_clt_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.clt.s.d"] fn msa_clt_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.clt.u.b"] fn msa_clt_u_b(a: v16u8, b: v16u8) -> v16i8; #[link_name = "llvm.mips.clt.u.h"] fn msa_clt_u_h(a: v8u16, b: v8u16) -> v8i16; #[link_name = "llvm.mips.clt.u.w"] fn msa_clt_u_w(a: v4u32, b: v4u32) -> v4i32; #[link_name = "llvm.mips.clt.u.d"] fn msa_clt_u_d(a: v2u64, b: v2u64) -> v2i64; #[link_name = "llvm.mips.clti.s.b"] fn msa_clti_s_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.clti.s.h"] fn msa_clti_s_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.clti.s.w"] fn msa_clti_s_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.clti.s.d"] fn msa_clti_s_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.clti.u.b"] fn msa_clti_u_b(a: v16u8, b: i32) -> v16i8; #[link_name = "llvm.mips.clti.u.h"] fn msa_clti_u_h(a: v8u16, b: i32) -> v8i16; #[link_name = "llvm.mips.clti.u.w"] fn msa_clti_u_w(a: v4u32, b: i32) -> v4i32; #[link_name = "llvm.mips.clti.u.d"] fn msa_clti_u_d(a: v2u64, b: i32) -> v2i64; #[link_name = "llvm.mips.copy.s.b"] fn msa_copy_s_b(a: v16i8, b: i32) -> i32; #[link_name = "llvm.mips.copy.s.h"] fn msa_copy_s_h(a: v8i16, b: i32) -> i32; #[link_name = "llvm.mips.copy.s.w"] fn msa_copy_s_w(a: v4i32, b: i32) -> i32; #[link_name = "llvm.mips.copy.s.d"] fn msa_copy_s_d(a: v2i64, b: i32) -> i64; #[link_name = "llvm.mips.copy.u.b"] fn msa_copy_u_b(a: v16i8, b: i32) -> u32; #[link_name = "llvm.mips.copy.u.h"] fn msa_copy_u_h(a: v8i16, b: i32) -> u32; #[link_name = "llvm.mips.copy.u.w"] fn msa_copy_u_w(a: v4i32, b: i32) -> u32; #[link_name = "llvm.mips.copy.u.d"] fn msa_copy_u_d(a: v2i64, b: i32) -> u64; #[link_name = "llvm.mips.ctcmsa"] fn msa_ctcmsa(imm5: i32, a: i32) -> (); #[link_name = "llvm.mips.div.s.b"] fn msa_div_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.div.s.h"] fn msa_div_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.div.s.w"] fn msa_div_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.div.s.d"] fn msa_div_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.div.u.b"] fn msa_div_u_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.div.u.h"] fn msa_div_u_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.div.u.w"] fn msa_div_u_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.div.u.d"] fn msa_div_u_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.dotp.s.h"] fn msa_dotp_s_h(a: v16i8, b: v16i8) -> v8i16; #[link_name = "llvm.mips.dotp.s.w"] fn msa_dotp_s_w(a: v8i16, b: v8i16) -> v4i32; #[link_name = "llvm.mips.dotp.s.d"] fn msa_dotp_s_d(a: v4i32, b: v4i32) -> v2i64; #[link_name = "llvm.mips.dotp.u.h"] fn msa_dotp_u_h(a: v16u8, b: v16u8) -> v8u16; #[link_name = "llvm.mips.dotp.u.w"] fn msa_dotp_u_w(a: v8u16, b: v8u16) -> v4u32; #[link_name = "llvm.mips.dotp.u.d"] fn msa_dotp_u_d(a: v4u32, b: v4u32) -> v2u64; #[link_name = "llvm.mips.dpadd.s.h"] fn msa_dpadd_s_h(a: v8i16, b: v16i8, c: v16i8) -> v8i16; #[link_name = "llvm.mips.dpadd.s.w"] fn msa_dpadd_s_w(a: v4i32, b: v8i16, c: v8i16) -> v4i32; #[link_name = "llvm.mips.dpadd.s.d"] fn msa_dpadd_s_d(a: v2i64, b: v4i32, c: v4i32) -> v2i64; #[link_name = "llvm.mips.dpadd.s.h"] fn msa_dpadd_u_h(a: v8u16, b: v16u8, c: v16u8) -> v8u16; #[link_name = "llvm.mips.dpadd.u.w"] fn msa_dpadd_u_w(a: v4u32, b: v8u16, c: v8u16) -> v4u32; #[link_name = "llvm.mips.dpadd.u.d"] fn msa_dpadd_u_d(a: v2u64, b: v4u32, c: v4u32) -> v2u64; #[link_name = "llvm.mips.dpsub.s.h"] fn msa_dpsub_s_h(a: v8i16, b: v16i8, c: v16i8) -> v8i16; #[link_name = "llvm.mips.dpsub.s.w"] fn msa_dpsub_s_w(a: v4i32, b: v8i16, c: v8i16) -> v4i32; #[link_name = "llvm.mips.dpsub.s.d"] fn msa_dpsub_s_d(a: v2i64, b: v4i32, c: v4i32) -> v2i64; #[link_name = "llvm.mips.dpsub.u.h"] fn msa_dpsub_u_h(a: v8i16, b: v16u8, c: v16u8) -> v8i16; #[link_name = "llvm.mips.dpsub.u.w"] fn msa_dpsub_u_w(a: v4i32, b: v8u16, c: v8u16) -> v4i32; #[link_name = "llvm.mips.dpsub.u.d"] fn msa_dpsub_u_d(a: v2i64, b: v4u32, c: v4u32) -> v2i64; #[link_name = "llvm.mips.fadd.w"] fn msa_fadd_w(a: v4f32, b: v4f32) -> v4f32; #[link_name = "llvm.mips.fadd.d"] fn msa_fadd_d(a: v2f64, b: v2f64) -> v2f64; #[link_name = "llvm.mips.fcaf.w"] fn msa_fcaf_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fcaf.d"] fn msa_fcaf_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fceq.w"] fn msa_fceq_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fceq.d"] fn msa_fceq_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fclass.w"] fn msa_fclass_w(a: v4f32) -> v4i32; #[link_name = "llvm.mips.fclass.d"] fn msa_fclass_d(a: v2f64) -> v2i64; #[link_name = "llvm.mips.fcle.w"] fn msa_fcle_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fcle.d"] fn msa_fcle_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fclt.w"] fn msa_fclt_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fclt.d"] fn msa_fclt_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fcne.w"] fn msa_fcne_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fcne.d"] fn msa_fcne_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fcor.w"] fn msa_fcor_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fcor.d"] fn msa_fcor_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fcueq.w"] fn msa_fcueq_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fcueq.d"] fn msa_fcueq_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fcule.w"] fn msa_fcule_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fcule.d"] fn msa_fcule_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fcult.w"] fn msa_fcult_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fcult.d"] fn msa_fcult_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fcun.w"] fn msa_fcun_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fcun.d"] fn msa_fcun_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fcune.w"] fn msa_fcune_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fcune.d"] fn msa_fcune_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fdiv.w"] fn msa_fdiv_w(a: v4f32, b: v4f32) -> v4f32; #[link_name = "llvm.mips.fdiv.d"] fn msa_fdiv_d(a: v2f64, b: v2f64) -> v2f64; // FIXME: 16-bit floats // #[link_name = "llvm.mips.fexdo.h"] // fn msa_fexdo_h(a: v4f32, b: v4f32) -> f16x8; #[link_name = "llvm.mips.fexdo.w"] fn msa_fexdo_w(a: v2f64, b: v2f64) -> v4f32; #[link_name = "llvm.mips.fexp2.w"] fn msa_fexp2_w(a: v4f32, b: v4i32) -> v4f32; #[link_name = "llvm.mips.fexp2.d"] fn msa_fexp2_d(a: v2f64, b: v2i64) -> v2f64; #[link_name = "llvm.mips.fexupl.w"] // FIXME: 16-bit floats // fn msa_fexupl_w(a: f16x8) -> v4f32; #[link_name = "llvm.mips.fexupl.d"] fn msa_fexupl_d(a: v4f32) -> v2f64; // FIXME: 16-bit floats // #[link_name = "llvm.mips.fexupr.w"] // fn msa_fexupr_w(a: f16x8) -> v4f32; #[link_name = "llvm.mips.fexupr.d"] fn msa_fexupr_d(a: v4f32) -> v2f64; #[link_name = "llvm.mips.ffint.s.w"] fn msa_ffint_s_w(a: v4i32) -> v4f32; #[link_name = "llvm.mips.ffint.s.d"] fn msa_ffint_s_d(a: v2i64) -> v2f64; #[link_name = "llvm.mips.ffint.u.w"] fn msa_ffint_u_w(a: v4u32) -> v4f32; #[link_name = "llvm.mips.ffint.u.d"] fn msa_ffint_u_d(a: v2u64) -> v2f64; #[link_name = "llvm.mips.ffql.w"] fn msa_ffql_w(a: v8i16) -> v4f32; #[link_name = "llvm.mips.ffql.d"] fn msa_ffql_d(a: v4i32) -> v2f64; #[link_name = "llvm.mips.ffqr.w"] fn msa_ffqr_w(a: v8i16) -> v4f32; #[link_name = "llvm.mips.ffqr.d"] fn msa_ffqr_d(a: v4i32) -> v2f64; #[link_name = "llvm.mips.fill.b"] fn msa_fill_b(a: i32) -> v16i8; #[link_name = "llvm.mips.fill.h"] fn msa_fill_h(a: i32) -> v8i16; #[link_name = "llvm.mips.fill.w"] fn msa_fill_w(a: i32) -> v4i32; #[link_name = "llvm.mips.fill.d"] fn msa_fill_d(a: i64) -> v2i64; #[link_name = "llvm.mips.flog2.w"] fn msa_flog2_w(a: v4f32) -> v4f32; #[link_name = "llvm.mips.flog2.d"] fn msa_flog2_d(a: v2f64) -> v2f64; #[link_name = "llvm.mips.fmadd.w"] fn msa_fmadd_w(a: v4f32, b: v4f32, c: v4f32) -> v4f32; #[link_name = "llvm.mips.fmadd.d"] fn msa_fmadd_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64; #[link_name = "llvm.mips.fmax.w"] fn msa_fmax_w(a: v4f32, b: v4f32) -> v4f32; #[link_name = "llvm.mips.fmax.d"] fn msa_fmax_d(a: v2f64, b: v2f64) -> v2f64; #[link_name = "llvm.mips.fmax.a.w"] fn msa_fmax_a_w(a: v4f32, b: v4f32) -> v4f32; #[link_name = "llvm.mips.fmax.a.d"] fn msa_fmax_a_d(a: v2f64, b: v2f64) -> v2f64; #[link_name = "llvm.mips.fmin.w"] fn msa_fmin_w(a: v4f32, b: v4f32) -> v4f32; #[link_name = "llvm.mips.fmin.d"] fn msa_fmin_d(a: v2f64, b: v2f64) -> v2f64; #[link_name = "llvm.mips.fmin.a.w"] fn msa_fmin_a_w(a: v4f32, b: v4f32) -> v4f32; #[link_name = "llvm.mips.fmin.a.d"] fn msa_fmin_a_d(a: v2f64, b: v2f64) -> v2f64; #[link_name = "llvm.mips.fmsub.w"] fn msa_fmsub_w(a: v4f32, b: v4f32, c: v4f32) -> v4f32; #[link_name = "llvm.mips.fmsub.d"] fn msa_fmsub_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64; #[link_name = "llvm.mips.fmul.w"] fn msa_fmul_w(a: v4f32, b: v4f32) -> v4f32; #[link_name = "llvm.mips.fmul.d"] fn msa_fmul_d(a: v2f64, b: v2f64) -> v2f64; #[link_name = "llvm.mips.frint.w"] fn msa_frint_w(a: v4f32) -> v4f32; #[link_name = "llvm.mips.frint.d"] fn msa_frint_d(a: v2f64) -> v2f64; #[link_name = "llvm.mips.frcp.w"] fn msa_frcp_w(a: v4f32) -> v4f32; #[link_name = "llvm.mips.frcp.d"] fn msa_frcp_d(a: v2f64) -> v2f64; #[link_name = "llvm.mips.frsqrt.w"] fn msa_frsqrt_w(a: v4f32) -> v4f32; #[link_name = "llvm.mips.frsqrt.d"] fn msa_frsqrt_d(a: v2f64) -> v2f64; #[link_name = "llvm.mips.fsaf.w"] fn msa_fsaf_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fsaf.d"] fn msa_fsaf_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fseq.w"] fn msa_fseq_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fseq.d"] fn msa_fseq_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fsle.w"] fn msa_fsle_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fsle.d"] fn msa_fsle_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fslt.w"] fn msa_fslt_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fslt.d"] fn msa_fslt_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fsne.w"] fn msa_fsne_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fsne.d"] fn msa_fsne_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fsor.w"] fn msa_fsor_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fsor.d"] fn msa_fsor_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fsqrt.w"] fn msa_fsqrt_w(a: v4f32) -> v4f32; #[link_name = "llvm.mips.fsqrt.d"] fn msa_fsqrt_d(a: v2f64) -> v2f64; #[link_name = "llvm.mips.fsub.w"] fn msa_fsub_w(a: v4f32, b: v4f32) -> v4f32; #[link_name = "llvm.mips.fsub.d"] fn msa_fsub_d(a: v2f64, b: v2f64) -> v2f64; #[link_name = "llvm.mips.fsueq.w"] fn msa_fsueq_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fsueq.d"] fn msa_fsueq_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fsule.w"] fn msa_fsule_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fsule.d"] fn msa_fsule_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fsult.w"] fn msa_fsult_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fsult.d"] fn msa_fsult_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fsun.w"] fn msa_fsun_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fsun.d"] fn msa_fsun_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.fsune.w"] fn msa_fsune_w(a: v4f32, b: v4f32) -> v4i32; #[link_name = "llvm.mips.fsune.d"] fn msa_fsune_d(a: v2f64, b: v2f64) -> v2i64; #[link_name = "llvm.mips.ftint.s.w"] fn msa_ftint_s_w(a: v4f32) -> v4i32; #[link_name = "llvm.mips.ftint.s.d"] fn msa_ftint_s_d(a: v2f64) -> v2i64; #[link_name = "llvm.mips.ftint.u.w"] fn msa_ftint_u_w(a: v4f32) -> v4u32; #[link_name = "llvm.mips.ftint.u.d"] fn msa_ftint_u_d(a: v2f64) -> v2u64; #[link_name = "llvm.mips.ftq.h"] fn msa_ftq_h(a: v4f32, b: v4f32) -> v8i16; #[link_name = "llvm.mips.ftq.w"] fn msa_ftq_w(a: v2f64, b: v2f64) -> v4i32; #[link_name = "llvm.mips.ftrunc.s.w"] fn msa_ftrunc_s_w(a: v4f32) -> v4i32; #[link_name = "llvm.mips.ftrunc.s.d"] fn msa_ftrunc_s_d(a: v2f64) -> v2i64; #[link_name = "llvm.mips.ftrunc.u.w"] fn msa_ftrunc_u_w(a: v4f32) -> v4u32; #[link_name = "llvm.mips.ftrunc.u.d"] fn msa_ftrunc_u_d(a: v2f64) -> v2u64; #[link_name = "llvm.mips.hadd.s.h"] fn msa_hadd_s_h(a: v16i8, b: v16i8) -> v8i16; #[link_name = "llvm.mips.hadd.s.w"] fn msa_hadd_s_w(a: v8i16, b: v8i16) -> v4i32; #[link_name = "llvm.mips.hadd.s.d"] fn msa_hadd_s_d(a: v4i32, b: v4i32) -> v2i64; #[link_name = "llvm.mips.hadd.u.h"] fn msa_hadd_u_h(a: v16u8, b: v16u8) -> v8u16; #[link_name = "llvm.mips.hadd.u.w"] fn msa_hadd_u_w(a: v8u16, b: v8u16) -> v4u32; #[link_name = "llvm.mips.hadd.u.d"] fn msa_hadd_u_d(a: v4u32, b: v4u32) -> v2u64; #[link_name = "llvm.mips.hsub.s.h"] fn msa_hsub_s_h(a: v16i8, b: v16i8) -> v8i16; #[link_name = "llvm.mips.hsub.s.w"] fn msa_hsub_s_w(a: v8i16, b: v8i16) -> v4i32; #[link_name = "llvm.mips.hsub.s.d"] fn msa_hsub_s_d(a: v4i32, b: v4i32) -> v2i64; #[link_name = "llvm.mips.hsub.u.h"] fn msa_hsub_u_h(a: v16u8, b: v16u8) -> v8i16; #[link_name = "llvm.mips.hsub.u.w"] fn msa_hsub_u_w(a: v8u16, b: v8u16) -> v4i32; #[link_name = "llvm.mips.hsub.u.d"] fn msa_hsub_u_d(a: v4u32, b: v4u32) -> v2i64; #[link_name = "llvm.mips.ilvev.b"] fn msa_ilvev_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.ilvev.h"] fn msa_ilvev_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.ilvev.w"] fn msa_ilvev_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.ilvev.d"] fn msa_ilvev_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.ilvl.b"] fn msa_ilvl_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.ilvl.h"] fn msa_ilvl_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.ilvl.w"] fn msa_ilvl_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.ilvl.d"] fn msa_ilvl_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.ilvod.b"] fn msa_ilvod_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.ilvod.h"] fn msa_ilvod_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.ilvod.w"] fn msa_ilvod_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.ilvod.d"] fn msa_ilvod_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.ilvr.b"] fn msa_ilvr_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.ilvr.h"] fn msa_ilvr_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.ilvr.w"] fn msa_ilvr_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.ilvr.d"] fn msa_ilvr_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.insert.b"] fn msa_insert_b(a: v16i8, b: i32, c: i32) -> v16i8; #[link_name = "llvm.mips.insert.h"] fn msa_insert_h(a: v8i16, b: i32, c: i32) -> v8i16; #[link_name = "llvm.mips.insert.w"] fn msa_insert_w(a: v4i32, b: i32, c: i32) -> v4i32; #[link_name = "llvm.mips.insert.d"] fn msa_insert_d(a: v2i64, b: i32, c: i64) -> v2i64; #[link_name = "llvm.mips.insve.b"] fn msa_insve_b(a: v16i8, b: i32, c: v16i8) -> v16i8; #[link_name = "llvm.mips.insve.h"] fn msa_insve_h(a: v8i16, b: i32, c: v8i16) -> v8i16; #[link_name = "llvm.mips.insve.w"] fn msa_insve_w(a: v4i32, b: i32, c: v4i32) -> v4i32; #[link_name = "llvm.mips.insve.d"] fn msa_insve_d(a: v2i64, b: i32, c: v2i64) -> v2i64; #[link_name = "llvm.mips.ld.b"] fn msa_ld_b(mem_addr: *mut u8, b: i32) -> v16i8; #[link_name = "llvm.mips.ld.h"] fn msa_ld_h(mem_addr: *mut u8, b: i32) -> v8i16; #[link_name = "llvm.mips.ld.w"] fn msa_ld_w(mem_addr: *mut u8, b: i32) -> v4i32; #[link_name = "llvm.mips.ld.d"] fn msa_ld_d(mem_addr: *mut u8, b: i32) -> v2i64; #[link_name = "llvm.mips.ldi.b"] fn msa_ldi_b(a: i32) -> v16i8; #[link_name = "llvm.mips.ldi.h"] fn msa_ldi_h(a: i32) -> v8i16; #[link_name = "llvm.mips.ldi.w"] fn msa_ldi_w(a: i32) -> v4i32; #[link_name = "llvm.mips.ldi.d"] fn msa_ldi_d(a: i32) -> v2i64; #[link_name = "llvm.mips.madd.q.h"] fn msa_madd_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; #[link_name = "llvm.mips.madd.q.w"] fn msa_madd_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; #[link_name = "llvm.mips.maddr.q.h"] fn msa_maddr_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; #[link_name = "llvm.mips.maddr.q.w"] fn msa_maddr_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; #[link_name = "llvm.mips.maddv.b"] fn msa_maddv_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; #[link_name = "llvm.mips.maddv.h"] fn msa_maddv_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; #[link_name = "llvm.mips.maddv.w"] fn msa_maddv_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; #[link_name = "llvm.mips.maddv.d"] fn msa_maddv_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; #[link_name = "llvm.mips.max.a.b"] fn msa_max_a_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.max.a.h"] fn msa_max_a_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.max.a.w"] fn msa_max_a_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.max.a.d"] fn msa_max_a_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.max.s.b"] fn msa_max_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.max.s.h"] fn msa_max_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.max.s.w"] fn msa_max_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.max.s.d"] fn msa_max_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.max.u.b"] fn msa_max_u_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.max.u.h"] fn msa_max_u_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.max.u.w"] fn msa_max_u_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.max.u.d"] fn msa_max_u_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.maxi.s.b"] fn msa_maxi_s_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.maxi.s.h"] fn msa_maxi_s_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.maxi.s.w"] fn msa_maxi_s_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.maxi.s.d"] fn msa_maxi_s_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.maxi.u.b"] fn msa_maxi_u_b(a: v16u8, b: i32) -> v16u8; #[link_name = "llvm.mips.maxi.u.h"] fn msa_maxi_u_h(a: v8u16, b: i32) -> v8u16; #[link_name = "llvm.mips.maxi.u.w"] fn msa_maxi_u_w(a: v4u32, b: i32) -> v4u32; #[link_name = "llvm.mips.maxi.u.d"] fn msa_maxi_u_d(a: v2u64, b: i32) -> v2u64; #[link_name = "llvm.mips.min.a.b"] fn msa_min_a_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.min.a.h"] fn msa_min_a_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.min.a.w"] fn msa_min_a_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.min.a.d"] fn msa_min_a_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.min.s.b"] fn msa_min_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.min.s.h"] fn msa_min_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.min.s.w"] fn msa_min_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.min.s.d"] fn msa_min_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.min.u.b"] fn msa_min_u_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.min.u.h"] fn msa_min_u_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.min.u.w"] fn msa_min_u_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.min.u.d"] fn msa_min_u_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.mini.s.b"] fn msa_mini_s_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.mini.s.h"] fn msa_mini_s_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.mini.s.w"] fn msa_mini_s_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.mini.s.d"] fn msa_mini_s_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.mini.u.b"] fn msa_mini_u_b(a: v16u8, b: i32) -> v16u8; #[link_name = "llvm.mips.mini.u.h"] fn msa_mini_u_h(a: v8u16, b: i32) -> v8u16; #[link_name = "llvm.mips.mini.u.w"] fn msa_mini_u_w(a: v4u32, b: i32) -> v4u32; #[link_name = "llvm.mips.mini.u.d"] fn msa_mini_u_d(a: v2u64, b: i32) -> v2u64; #[link_name = "llvm.mips.mod.s.b"] fn msa_mod_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.mod.s.h"] fn msa_mod_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.mod.s.w"] fn msa_mod_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.mod.s.d"] fn msa_mod_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.mod.u.b"] fn msa_mod_u_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.mod.u.h"] fn msa_mod_u_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.mod.u.w"] fn msa_mod_u_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.mod.u.d"] fn msa_mod_u_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.move.v"] fn msa_move_v(a: v16i8) -> v16i8; #[link_name = "llvm.mips.msub.q.h"] fn msa_msub_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; #[link_name = "llvm.mips.msub.q.w"] fn msa_msub_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; #[link_name = "llvm.mips.msubr.q.h"] fn msa_msubr_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; #[link_name = "llvm.mips.msubr.q.w"] fn msa_msubr_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; #[link_name = "llvm.mips.msubv.b"] fn msa_msubv_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; #[link_name = "llvm.mips.msubv.h"] fn msa_msubv_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; #[link_name = "llvm.mips.msubv.w"] fn msa_msubv_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; #[link_name = "llvm.mips.msubv.d"] fn msa_msubv_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; #[link_name = "llvm.mips.mul.q.h"] fn msa_mul_q_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.mul.q.w"] fn msa_mul_q_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.mulr.q.h"] fn msa_mulr_q_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.mulr.q.w"] fn msa_mulr_q_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.mulv.b"] fn msa_mulv_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.mulv.h"] fn msa_mulv_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.mulv.w"] fn msa_mulv_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.mulv.d"] fn msa_mulv_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.nloc.b"] fn msa_nloc_b(a: v16i8) -> v16i8; #[link_name = "llvm.mips.nloc.h"] fn msa_nloc_h(a: v8i16) -> v8i16; #[link_name = "llvm.mips.nloc.w"] fn msa_nloc_w(a: v4i32) -> v4i32; #[link_name = "llvm.mips.nloc.d"] fn msa_nloc_d(a: v2i64) -> v2i64; #[link_name = "llvm.mips.nlzc.b"] fn msa_nlzc_b(a: v16i8) -> v16i8; #[link_name = "llvm.mips.nlzc.h"] fn msa_nlzc_h(a: v8i16) -> v8i16; #[link_name = "llvm.mips.nlzc.w"] fn msa_nlzc_w(a: v4i32) -> v4i32; #[link_name = "llvm.mips.nlzc.d"] fn msa_nlzc_d(a: v2i64) -> v2i64; #[link_name = "llvm.mips.nor.v"] fn msa_nor_v(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.nori.b"] fn msa_nori_b(a: v16u8, b: i32) -> v16u8; #[link_name = "llvm.mips.or.v"] fn msa_or_v(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.ori.b"] fn msa_ori_b(a: v16u8, b: i32) -> v16u8; #[link_name = "llvm.mips.pckev.b"] fn msa_pckev_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.pckev.h"] fn msa_pckev_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.pckev.w"] fn msa_pckev_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.pckev.d"] fn msa_pckev_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.pckod.b"] fn msa_pckod_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.pckod.h"] fn msa_pckod_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.pckod.w"] fn msa_pckod_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.pckod.d"] fn msa_pckod_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.pcnt.b"] fn msa_pcnt_b(a: v16i8) -> v16i8; #[link_name = "llvm.mips.pcnt.h"] fn msa_pcnt_h(a: v8i16) -> v8i16; #[link_name = "llvm.mips.pcnt.w"] fn msa_pcnt_w(a: v4i32) -> v4i32; #[link_name = "llvm.mips.pcnt.d"] fn msa_pcnt_d(a: v2i64) -> v2i64; #[link_name = "llvm.mips.sat.s.b"] fn msa_sat_s_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.sat.s.h"] fn msa_sat_s_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.sat.s.w"] fn msa_sat_s_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.sat.s.d"] fn msa_sat_s_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.sat.u.b"] fn msa_sat_u_b(a: v16u8, b: i32) -> v16u8; #[link_name = "llvm.mips.sat.u.h"] fn msa_sat_u_h(a: v8u16, b: i32) -> v8u16; #[link_name = "llvm.mips.sat.u.w"] fn msa_sat_u_w(a: v4u32, b: i32) -> v4u32; #[link_name = "llvm.mips.sat.u.d"] fn msa_sat_u_d(a: v2u64, b: i32) -> v2u64; #[link_name = "llvm.mips.shf.b"] fn msa_shf_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.shf.h"] fn msa_shf_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.shf.w"] fn msa_shf_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.sld.b"] fn msa_sld_b(a: v16i8, b: v16i8, c: i32) -> v16i8; #[link_name = "llvm.mips.sld.h"] fn msa_sld_h(a: v8i16, b: v8i16, c: i32) -> v8i16; #[link_name = "llvm.mips.sld.w"] fn msa_sld_w(a: v4i32, b: v4i32, c: i32) -> v4i32; #[link_name = "llvm.mips.sld.d"] fn msa_sld_d(a: v2i64, b: v2i64, c: i32) -> v2i64; #[link_name = "llvm.mips.sldi.b"] fn msa_sldi_b(a: v16i8, b: v16i8, c: i32) -> v16i8; #[link_name = "llvm.mips.sldi.h"] fn msa_sldi_h(a: v8i16, b: v8i16, c: i32) -> v8i16; #[link_name = "llvm.mips.sldi.w"] fn msa_sldi_w(a: v4i32, b: v4i32, c: i32) -> v4i32; #[link_name = "llvm.mips.sldi.d"] fn msa_sldi_d(a: v2i64, b: v2i64, c: i32) -> v2i64; #[link_name = "llvm.mips.sll.b"] fn msa_sll_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.sll.h"] fn msa_sll_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.sll.w"] fn msa_sll_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.sll.d"] fn msa_sll_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.slli.b"] fn msa_slli_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.slli.h"] fn msa_slli_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.slli.w"] fn msa_slli_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.slli.d"] fn msa_slli_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.splat.b"] fn msa_splat_b(a: v16i8, c: i32) -> v16i8; #[link_name = "llvm.mips.splat.h"] fn msa_splat_h(a: v8i16, c: i32) -> v8i16; #[link_name = "llvm.mips.splat.w"] fn msa_splat_w(a: v4i32, w: i32) -> v4i32; #[link_name = "llvm.mips.splat.d"] fn msa_splat_d(a: v2i64, c: i32) -> v2i64; #[link_name = "llvm.mips.splati.b"] fn msa_splati_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.splati.h"] fn msa_splati_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.splati.w"] fn msa_splati_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.splati.d"] fn msa_splati_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.sra.b"] fn msa_sra_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.sra.h"] fn msa_sra_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.sra.w"] fn msa_sra_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.sra.d"] fn msa_sra_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.srai.b"] fn msa_srai_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.srai.h"] fn msa_srai_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.srai.w"] fn msa_srai_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.srai.d"] fn msa_srai_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.srar.b"] fn msa_srar_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.srar.h"] fn msa_srar_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.srar.w"] fn msa_srar_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.srar.d"] fn msa_srar_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.srari.b"] fn msa_srari_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.srari.h"] fn msa_srari_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.srari.w"] fn msa_srari_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.srari.d"] fn msa_srari_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.srl.b"] fn msa_srl_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.srl.h"] fn msa_srl_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.srl.w"] fn msa_srl_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.srl.d"] fn msa_srl_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.srli.b"] fn msa_srli_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.srli.h"] fn msa_srli_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.srli.w"] fn msa_srli_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.srli.d"] fn msa_srli_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.srlr.b"] fn msa_srlr_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.srlr.h"] fn msa_srlr_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.srlr.w"] fn msa_srlr_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.srlr.d"] fn msa_srlr_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.srlri.b"] fn msa_srlri_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.srlri.h"] fn msa_srlri_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.srlri.w"] fn msa_srlri_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.srlri.d"] fn msa_srlri_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.st.b"] fn msa_st_b(a: v16i8, mem_addr: *mut u8, imm_s10: i32) -> (); #[link_name = "llvm.mips.st.h"] fn msa_st_h(a: v8i16, mem_addr: *mut u8, imm_s11: i32) -> (); #[link_name = "llvm.mips.st.w"] fn msa_st_w(a: v4i32, mem_addr: *mut u8, imm_s12: i32) -> (); #[link_name = "llvm.mips.st.d"] fn msa_st_d(a: v2i64, mem_addr: *mut u8, imm_s13: i32) -> (); #[link_name = "llvm.mips.subs.s.b"] fn msa_subs_s_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.subs.s.h"] fn msa_subs_s_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.subs.s.w"] fn msa_subs_s_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.subs.s.d"] fn msa_subs_s_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.subs.u.b"] fn msa_subs_u_b(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.subs.u.h"] fn msa_subs_u_h(a: v8u16, b: v8u16) -> v8u16; #[link_name = "llvm.mips.subs.u.w"] fn msa_subs_u_w(a: v4u32, b: v4u32) -> v4u32; #[link_name = "llvm.mips.subs.u.d"] fn msa_subs_u_d(a: v2u64, b: v2u64) -> v2u64; #[link_name = "llvm.mips.subsus.u.b"] fn msa_subsus_u_b(a: v16u8, b: v16i8) -> v16u8; #[link_name = "llvm.mips.subsus.u.h"] fn msa_subsus_u_h(a: v8u16, b: v8i16) -> v8u16; #[link_name = "llvm.mips.subsus.u.w"] fn msa_subsus_u_w(a: v4u32, b: v4i32) -> v4u32; #[link_name = "llvm.mips.subsus.u.d"] fn msa_subsus_u_d(a: v2u64, b: v2i64) -> v2u64; #[link_name = "llvm.mips.subsuu.s.b"] fn msa_subsuu_s_b(a: v16u8, b: v16u8) -> v16i8; #[link_name = "llvm.mips.subsuu.s.h"] fn msa_subsuu_s_h(a: v8u16, b: v8u16) -> v8i16; #[link_name = "llvm.mips.subsuu.s.w"] fn msa_subsuu_s_w(a: v4u32, b: v4u32) -> v4i32; #[link_name = "llvm.mips.subsuu.s.d"] fn msa_subsuu_s_d(a: v2u64, b: v2u64) -> v2i64; #[link_name = "llvm.mips.subv.b"] fn msa_subv_b(a: v16i8, b: v16i8) -> v16i8; #[link_name = "llvm.mips.subv.h"] fn msa_subv_h(a: v8i16, b: v8i16) -> v8i16; #[link_name = "llvm.mips.subv.w"] fn msa_subv_w(a: v4i32, b: v4i32) -> v4i32; #[link_name = "llvm.mips.subv.d"] fn msa_subv_d(a: v2i64, b: v2i64) -> v2i64; #[link_name = "llvm.mips.subvi.b"] fn msa_subvi_b(a: v16i8, b: i32) -> v16i8; #[link_name = "llvm.mips.subvi.h"] fn msa_subvi_h(a: v8i16, b: i32) -> v8i16; #[link_name = "llvm.mips.subvi.w"] fn msa_subvi_w(a: v4i32, b: i32) -> v4i32; #[link_name = "llvm.mips.subvi.d"] fn msa_subvi_d(a: v2i64, b: i32) -> v2i64; #[link_name = "llvm.mips.vshf.b"] fn msa_vshf_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8; #[link_name = "llvm.mips.vshf.h"] fn msa_vshf_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16; #[link_name = "llvm.mips.vshf.w"] fn msa_vshf_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32; #[link_name = "llvm.mips.vshf.d"] fn msa_vshf_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64; #[link_name = "llvm.mips.xor.v"] fn msa_xor_v(a: v16u8, b: v16u8) -> v16u8; #[link_name = "llvm.mips.xori.b"] fn msa_xori_b(a: v16u8, b: i32) -> v16u8; } /// Vector Add Absolute Values. /// /// The absolute values of the elements in vector in `a` (sixteen signed 8-bit integer numbers) /// are added to the absolute values of the elements in vector `b` (sixteen signed 8-bit integer numbers) /// The result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(add_a.b))] pub unsafe fn __msa_add_a_b(a: v16i8, b: v16i8) -> v16i8 { msa_add_a_b(a, ::mem::transmute(b)) } /// Vector Add Absolute Values /// /// The absolute values of the elements in vector in `a` (eight signed 16-bit integer numbers) /// are added to the absolute values of the elements in vector `b` (eight signed 16-bit integer numbers) /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(add_a.h))] pub unsafe fn __msa_add_a_h(a: v8i16, b: v8i16) -> v8i16 { msa_add_a_h(a, ::mem::transmute(b)) } /// Vector Add Absolute Values /// /// The absolute values of the elements in vector in `a` (four signed 32-bit integer numbers) /// are added to the absolute values of the elements in vector `b` (four signed 32-bit integer numbers) /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(add_a.w))] pub unsafe fn __msa_add_a_w(a: v4i32, b: v4i32) -> v4i32 { msa_add_a_w(a, ::mem::transmute(b)) } /// Vector Add Absolute Values /// /// The absolute values of the elements in vector in `a` (two signed 64-bit integer numbers) /// are added to the absolute values of the elements in vector `b` (two signed 64-bit integer numbers) // The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(add_a.d))] pub unsafe fn __msa_add_a_d(a: v2i64, b: v2i64) -> v2i64 { msa_add_a_d(a, ::mem::transmute(b)) } /// Signed Saturated Vector Saturated Add of Absolute Values /// /// The absolute values of the elements in vector in `a` (sixteen signed 8-bit integer numbers) /// are added to the absolute values of the elements in vector `b` (sixteen signed 8-bit integer numbers) /// The saturated signed result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_a.b))] pub unsafe fn __msa_adds_a_b(a: v16i8, b: v16i8) -> v16i8 { msa_adds_a_b(a, ::mem::transmute(b)) } /// Vector Saturated Add of Absolute Values /// /// The absolute values of the elements in vector in `a` (eight signed 16-bit integer numbers) /// are added to the absolute values of the elements in vector `b` (eight signed 16-bit integer numbers) /// The saturated signed result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_a.h))] pub unsafe fn __msa_adds_a_h(a: v8i16, b: v8i16) -> v8i16 { msa_adds_a_h(a, ::mem::transmute(b)) } /// Vector Saturated Add of Absolute Values /// /// The absolute values of the elements in vector in `a` (four signed 32-bit integer numbers) /// are added to the absolute values of the elements in vector `b` (four signed 32-bit integer numbers) /// The saturated signed result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_a.w))] pub unsafe fn __msa_adds_a_w(a: v4i32, b: v4i32) -> v4i32 { msa_adds_a_w(a, ::mem::transmute(b)) } /// Vector Saturated Add of Absolute Values /// /// The absolute values of the elements in vector in `a` (two signed 64-bit integer numbers) /// are added to the absolute values of the elements in vector `b` (two signed 64-bit integer numbers) // The saturated signed result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_a.d))] pub unsafe fn __msa_adds_a_d(a: v2i64, b: v2i64) -> v2i64 { msa_adds_a_d(a, ::mem::transmute(b)) } /// Vector Signed Saturated Add of Signed Values /// /// The elements in vector in `a` (sixteen signed 8-bit integer numbers) /// are added to the elements in vector `b` (sixteen signed 8-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_s.b))] pub unsafe fn __msa_adds_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_adds_s_b(a, ::mem::transmute(b)) } /// Vector Signed Saturated Add of Signed Values /// /// The elements in vector in `a` (eight signed 16-bit integer numbers) /// are added to the elements in vector `b` (eight signed 16-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_s.h))] pub unsafe fn __msa_adds_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_adds_s_h(a, ::mem::transmute(b)) } /// Vector Signed Saturated Add of Signed Values /// /// The elements in vector in `a` (four signed 32-bit integer numbers) /// are added to the elements in vector `b` (four signed 32-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_s.w))] pub unsafe fn __msa_adds_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_adds_s_w(a, ::mem::transmute(b)) } /// Vector Signed Saturated Add of Signed Values /// /// The elements in vector in `a` (two signed 64-bit integer numbers) /// are added to the elements in vector `b` (two signed 64-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_s.d))] pub unsafe fn __msa_adds_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_adds_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Add of Unsigned Values /// /// The elements in vector in `a` (sixteen unsigned 8-bit integer numbers) /// are added to the elements in vector `b` (sixteen unsigned 8-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_u.b))] pub unsafe fn __msa_adds_u_b(a: v16u8, b: v16u8) -> v16u8 { msa_adds_u_b(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Add of Unsigned Values /// /// The elements in vector in `a` (eight unsigned 16-bit integer numbers) /// are added to the elements in vector `b` (eight unsigned 16-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_u.h))] pub unsafe fn __msa_adds_u_h(a: v8u16, b: v8u16) -> v8u16 { msa_adds_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Add of Unsigned Values /// /// The elements in vector in `a` (four unsigned 32-bit integer numbers) /// are added to the elements in vector `b` (four unsigned 32-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_u.w))] pub unsafe fn __msa_adds_u_w(a: v4u32, b: v4u32) -> v4u32 { msa_adds_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Add of Unsigned Values /// /// The elements in vector in `a` (two unsigned 64-bit integer numbers) /// are added to the elements in vector `b` (two unsigned 64-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(adds_u.d))] pub unsafe fn __msa_adds_u_d(a: v2u64, b: v2u64) -> v2u64 { msa_adds_u_d(a, ::mem::transmute(b)) } /// Vector Add /// /// The elements in vector in `a` (sixteen signed 8-bit integer numbers) /// are added to the elements in vector `b` (sixteen signed 8-bit integer numbers) /// The result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(addv.b))] pub unsafe fn __msa_addv_b(a: v16i8, b: v16i8) -> v16i8 { msa_addv_b(a, ::mem::transmute(b)) } /// Vector Add /// /// The elements in vector in `a` (eight signed 16-bit integer numbers) /// are added to the elements in vector `b` (eight signed 16-bit integer numbers) /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(addv.h))] pub unsafe fn __msa_addv_h(a: v8i16, b: v8i16) -> v8i16 { msa_addv_h(a, ::mem::transmute(b)) } /// Vector Add /// /// The elements in vector in `a` (four signed 32-bit integer numbers) /// are added to the elements in vector `b` (four signed 32-bit integer numbers) /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(addv.w))] pub unsafe fn __msa_addv_w(a: v4i32, b: v4i32) -> v4i32 { msa_addv_w(a, ::mem::transmute(b)) } /// Vector Add /// /// The elements in vector in `a` (two signed 64-bit integer numbers) /// are added to the elements in vector `b` (two signed 64-bit integer numbers) /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(addv.d))] pub unsafe fn __msa_addv_d(a: v2i64, b: v2i64) -> v2i64 { msa_addv_d(a, ::mem::transmute(b)) } /// Immediate Add /// /// The 5-bit immediate unsigned value u5 is added to the elements /// vector in `a` (sixteen signed 8-bit integer numbers) /// The result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(addvi.b, imm5 = 0b10111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_addvi_b(a: v16i8, imm5: i32) -> v16i8 { macro_rules! call { ($imm5:expr) => { msa_addvi_b(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Add /// /// The 5-bit immediate unsigned value u5 is added to the elements /// vector in `a` (eight signed 16-bit integer numbers) /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(addvi.h, imm5 = 0b10111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_addvi_h(a: v8i16, imm5: i32) -> v8i16 { macro_rules! call { ($imm5:expr) => { msa_addvi_h(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Add /// /// The 5-bit immediate unsigned value u5 is added to the elements /// vector in `a` (four signed 32-bit integer numbers) /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(addvi.w, imm5 = 0b10111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_addvi_w(a: v4i32, imm5: i32) -> v4i32 { macro_rules! call { ($imm5:expr) => { msa_addvi_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Add /// /// The 5-bit immediate unsigned value u5 is added to the elements /// vector in `a` (two signed 64-bit integer numbers) /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(addvi.d, imm5 = 0b10111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_addvi_d(a: v2i64, imm5: i32) -> v2i64 { macro_rules! call { ($imm5:expr) => { msa_addvi_d(a, $imm5) }; } constify_imm5!(imm5, call) } /// Vector Logical And /// /// Each bit of vector `a` (sixteen unsigned 8-bit integer numbers) /// is combined with the corresponding bit of vector 'b' (sixteen unsigned 8-bit integer numbers). /// in a bitwise logical AND operation. /// The result is written to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(and.v))] pub unsafe fn __msa_and_v(a: v16u8, b: v16u8) -> v16u8 { msa_and_v(a, ::mem::transmute(b)) } /// Immediate Logical And /// /// Each byte element of vector `a` (sixteen unsigned 8-bit integer numbers) /// is combined with the 8-bit immediate i8 (signed 8-bit integer number) in a bitwise logical AND operation. /// The result is written to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(andi.b, imm8 = 0b10010111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_andi_b(a: v16u8, imm8: i32) -> v16u8 { macro_rules! call { ($imm8:expr) => { msa_andi_b(a, $imm8) }; } constify_imm8!(imm8, call) } /// Vector Absolute Values of Signed Subtract /// /// The signed elements in vector `a` (sixteen signed 8-bit integer numbers) /// are subtracted from the signed elements in vector `b` (sixteen signed 8-bit integer numbers) /// The absolute value of the signed result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(asub_s.b))] pub unsafe fn __msa_asub_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_asub_s_b(a, ::mem::transmute(b)) } /// Vector Absolute Values of Signed Subtract /// /// The signed elements in vector `a` (eight signed 16-bit integer numbers) /// are subtracted from the signed elements in vector `b` (eight signed 16-bit integer numbers) /// The absolute value of the signed result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(asub_s.h))] pub unsafe fn __msa_asub_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_asub_s_h(a, ::mem::transmute(b)) } /// Vector Absolute Values of Signed Subtract /// /// The signed elements in vector `a` (four signed 32-bit integer numbers) /// are subtracted from the signed elements in vector `b` (four signed 32-bit integer numbers) /// The absolute value of the signed result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(asub_s.w))] pub unsafe fn __msa_asub_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_asub_s_w(a, ::mem::transmute(b)) } /// Vector Absolute Values of Signed Subtract /// /// The signed elements in vector `a` (two signed 64-bit integer numbers) /// are subtracted from the signed elements in vector `b` (two signed 64-bit integer numbers) /// The absolute value of the signed result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(asub_s.d))] pub unsafe fn __msa_asub_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_asub_s_d(a, ::mem::transmute(b)) } /// Vector Absolute Values of Unsigned Subtract /// /// The unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers) /// are subtracted from the unsigned elements in vector `b` (sixteen unsigned 8-bit integer numbers) /// The absolute value of the unsigned result is written to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(asub_u.b))] pub unsafe fn __msa_asub_u_b(a: v16u8, b: v16u8) -> v16u8 { msa_asub_u_b(a, ::mem::transmute(b)) } /// Vector Absolute Values of Unsigned Subtract /// /// The unsigned elements in vector `a` (eight unsigned 16-bit integer numbers) /// are subtracted from the unsigned elements in vector `b` (eight unsigned 16-bit integer numbers) /// The absolute value of the unsigned result is written to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(asub_u.h))] pub unsafe fn __msa_asub_u_h(a: v8u16, b: v8u16) -> v8u16 { msa_asub_u_h(a, ::mem::transmute(b)) } /// Vector Absolute Values of Unsigned Subtract /// /// The unsigned elements in vector `a` (four unsigned 32-bit integer numbers) /// are subtracted from the unsigned elements in vector `b` (four unsigned 32-bit integer numbers) /// The absolute value of the unsigned result is written to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(asub_u.w))] pub unsafe fn __msa_asub_u_w(a: v4u32, b: v4u32) -> v4u32 { msa_asub_u_w(a, ::mem::transmute(b)) } /// Vector Absolute Values of Unsigned Subtract /// /// The unsigned elements in vector `a` (two unsigned 64-bit integer numbers) /// are subtracted from the unsigned elements in vector `b` (two unsigned 64-bit integer numbers) /// The absolute value of the unsigned result is written to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(asub_u.d))] pub unsafe fn __msa_asub_u_d(a: v2u64, b: v2u64) -> v2u64 { msa_asub_u_d(a, ::mem::transmute(b)) } /// Vector Signed Average /// /// The elements in vector `a` (sixteen signed 8-bit integer numbers) /// are added to the elements in vector `b` (sixteen signed 8-bit integer numbers) /// The addition is done signed with full precision, i.e.the result has one extra bit /// Signed division by 2 (or arithmetic shift right by one bit) is performed before /// writing the result to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ave_s.b))] pub unsafe fn __msa_ave_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_ave_s_b(a, ::mem::transmute(b)) } /// Vector Signed Average /// /// The elements in vector `a` (eight signed 16-bit integer numbers) /// are added to the elements in vector `b` (eight signed 16-bit integer numbers) /// The addition is done signed with full precision, i.e.the result has one extra bit /// Signed division by 2 (or arithmetic shift right by one bit) is performed before /// writing the result to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ave_s.h))] pub unsafe fn __msa_ave_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_ave_s_h(a, ::mem::transmute(b)) } /// Vector Signed Average /// /// The elements in vector `a` (four signed 32-bit integer numbers) /// are added to the elements in vector `b` (four signed 32-bit integer numbers) /// The addition is done signed with full precision, i.e.the result has one extra bit /// Signed division by 2 (or arithmetic shift right by one bit) is performed before /// writing the result to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ave_s.w))] pub unsafe fn __msa_ave_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_ave_s_w(a, ::mem::transmute(b)) } /// Vector Signed Average /// /// The elements in vector `a` (two signed 64-bit integer numbers) /// are added to the elements in vector `b` (two signed 64-bit integer numbers) /// The addition is done signed with full precision, i.e.the result has one extra bit /// Signed division by 2 (or arithmetic shift right by one bit) is performed before /// writing the result to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ave_s.d))] pub unsafe fn __msa_ave_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_ave_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Average /// /// The elements in vector `a` (sixteen unsigned 8-bit integer numbers) /// are added to the elements in vector `b` (sixteen unsigned 8-bit integer numbers) /// The addition is done unsigned with full precision, i.e.the result has one extra bit /// Unsigned division by 2 (or logical shift right by one bit) is performed before /// writing the result to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ave_u.b))] pub unsafe fn __msa_ave_u_b(a: v16u8, b: v16u8) -> v16u8 { msa_ave_u_b(a, ::mem::transmute(b)) } /// Vector Unsigned Average /// /// The elements in vector `a` (eight unsigned 16-bit integer numbers) /// are added to the elements in vector `b` (eight unsigned 16-bit integer numbers) /// The addition is done unsigned with full precision, i.e.the result has one extra bit /// Unsigned division by 2 (or logical shift right by one bit) is performed before /// writing the result to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ave_u.h))] pub unsafe fn __msa_ave_u_h(a: v8u16, b: v8u16) -> v8u16 { msa_ave_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Average /// /// The elements in vector `a` (four unsigned 32-bit integer numbers) /// are added to the elements in vector `b` (four unsigned 32-bit integer numbers) /// The addition is done unsigned with full precision, i.e.the result has one extra bit /// Unsigned division by 2 (or logical shift right by one bit) is performed before /// writing the result to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ave_u.w))] pub unsafe fn __msa_ave_u_w(a: v4u32, b: v4u32) -> v4u32 { msa_ave_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Average /// /// The elements in vector `a` (two unsigned 64-bit integer numbers) /// are added to the elements in vector `b` (two unsigned 64-bit integer numbers) /// The addition is done unsigned with full precision, i.e.the result has one extra bit /// Unsigned division by 2 (or logical shift right by one bit) is performed before /// writing the result to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ave_u.d))] pub unsafe fn __msa_ave_u_d(a: v2u64, b: v2u64) -> v2u64 { msa_ave_u_d(a, ::mem::transmute(b)) } /// Vector Signed Average Rounded /// /// The elements in vector `a` (sixteen signed 8-bit integer numbers) /// are added to the elements in vector `b` (sixteen signed 8-bit integer numbers) /// The addition of the elements plus 1 (for rounding) is done signed with full precision, /// i.e. the result has one extra bit. /// Signed division by 2 (or arithmetic shift right by one bit) is performed before /// writing the result to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(aver_s.b))] pub unsafe fn __msa_aver_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_aver_s_b(a, ::mem::transmute(b)) } /// Vector Signed Average Rounded /// /// The elements in vector `a` (eight signed 16-bit integer numbers) /// are added to the elements in vector `b` (eight signed 16-bit integer numbers) /// The addition of the elements plus 1 (for rounding) is done signed with full precision, /// i.e. the result has one extra bit. /// Signed division by 2 (or arithmetic shift right by one bit) is performed before /// writing the result to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(aver_s.h))] pub unsafe fn __msa_aver_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_aver_s_h(a, ::mem::transmute(b)) } /// Vector Signed Average Rounded /// /// The elements in vector `a` (four signed 32-bit integer numbers) /// are added to the elements in vector `b` (four signed 32-bit integer numbers) /// The addition of the elements plus 1 (for rounding) is done signed with full precision, /// i.e. the result has one extra bit. /// Signed division by 2 (or arithmetic shift right by one bit) is performed before /// writing the result to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(aver_s.w))] pub unsafe fn __msa_aver_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_aver_s_w(a, ::mem::transmute(b)) } /// Vector Signed Average Rounded /// /// The elements in vector `a` (two signed 64-bit integer numbers) /// are added to the elements in vector `b` (two signed 64-bit integer numbers) /// The addition of the elements plus 1 (for rounding) is done signed with full precision, /// i.e. the result has one extra bit. /// Signed division by 2 (or arithmetic shift right by one bit) is performed before /// writing the result to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(aver_s.d))] pub unsafe fn __msa_aver_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_aver_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Average Rounded /// /// The elements in vector `a` (sixteen unsigned 8-bit integer numbers) /// are added to the elements in vector `b` (sixteen unsigned 8-bit integer numbers) /// The addition of the elements plus 1 (for rounding) is done unsigned with full precision, /// i.e. the result has one extra bit. /// Unsigned division by 2 (or logical shift right by one bit) is performed before /// writing the result to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(aver_u.b))] pub unsafe fn __msa_aver_u_b(a: v16u8, b: v16u8) -> v16u8 { msa_aver_u_b(a, ::mem::transmute(b)) } /// Vector Unsigned Average Rounded /// /// The elements in vector `a` (eight unsigned 16-bit integer numbers) /// are added to the elements in vector `b` (eight unsigned 16-bit integer numbers) /// The addition of the elements plus 1 (for rounding) is done unsigned with full precision, /// i.e. the result has one extra bit. /// Unsigned division by 2 (or logical shift right by one bit) is performed before /// writing the result to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(aver_u.h))] pub unsafe fn __msa_aver_u_h(a: v8u16, b: v8u16) -> v8u16 { msa_aver_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Average Rounded /// /// The elements in vector `a` (four unsigned 32-bit integer numbers) /// are added to the elements in vector `b` (four unsigned 32-bit integer numbers) /// The addition of the elements plus 1 (for rounding) is done unsigned with full precision, /// i.e. the result has one extra bit. /// Unsigned division by 2 (or logical shift right by one bit) is performed before /// writing the result to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(aver_u.w))] pub unsafe fn __msa_aver_u_w(a: v4u32, b: v4u32) -> v4u32 { msa_aver_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Average Rounded /// /// The elements in vector `a` (two unsigned 64-bit integer numbers) /// are added to the elements in vector `b` (two unsigned 64-bit integer numbers) /// The addition of the elements plus 1 (for rounding) is done unsigned with full precision, /// i.e. the result has one extra bit. /// Unsigned division by 2 (or logical shift right by one bit) is performed before /// writing the result to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(aver_u.d))] pub unsafe fn __msa_aver_u_d(a: v2u64, b: v2u64) -> v2u64 { msa_aver_u_d(a, ::mem::transmute(b)) } /// Vector Bit Clear /// /// Clear (set to 0) one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers) /// The bit position is given by the elements in `b` (sixteen unsigned 8-bit integer numbers) /// modulo the size of the element in bits. /// The result is written to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bclr.b))] pub unsafe fn __msa_bclr_b(a: v16u8, b: v16u8) -> v16u8 { msa_bclr_b(a, ::mem::transmute(b)) } /// Vector Bit Clear /// /// Clear (set to 0) one bit in each element of vector `a` (eight unsigned 16-bit integer numbers) /// The bit position is given by the elements in `b` (eight unsigned 16-bit integer numbers) /// modulo the size of the element in bits. /// The result is written to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bclr.h))] pub unsafe fn __msa_bclr_h(a: v8u16, b: v8u16) -> v8u16 { msa_bclr_h(a, ::mem::transmute(b)) } /// Vector Bit Clear /// /// Clear (set to 0) one bit in each element of vector `a` (four unsigned 32-bit integer numbers) /// The bit position is given by the elements in `b` (four unsigned 32-bit integer numbers) /// modulo the size of the element in bits. /// The result is written to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bclr.w))] pub unsafe fn __msa_bclr_w(a: v4u32, b: v4u32) -> v4u32 { msa_bclr_w(a, ::mem::transmute(b)) } /// Vector Bit Clear /// /// Clear (set to 0) one bit in each element of vector `a` (two unsigned 64-bit integer numbers) /// The bit position is given by the elements in `b` (two unsigned 64-bit integer numbers) /// modulo the size of the element in bits. /// The result is written to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bclr.d))] pub unsafe fn __msa_bclr_d(a: v2u64, b: v2u64) -> v2u64 { msa_bclr_d(a, ::mem::transmute(b)) } /// Immediate Bit Clear /// /// Clear (set to 0) one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers) /// The bit position is given by the immediate 'm' modulo the size of the element in bits. /// The result is written to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bclri.b, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bclri_b(a: v16u8, imm3: i32) -> v16u8 { macro_rules! call { ($imm3:expr) => { msa_bclri_b(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Bit Clear /// /// Clear (set to 0) one bit in each element of vector `a` (eight unsigned 16-bit integer numbers) /// The bit position is given by the immediate 'm' modulo the size of the element in bits. /// The result is written to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bclri.h, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bclri_h(a: v8u16, imm4: i32) -> v8u16 { macro_rules! call { ($imm4:expr) => { msa_bclri_h(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Bit Clear /// /// Clear (set to 0) one bit in each element of vector `a` (four unsigned 32-bit integer numbers) /// The bit position is given by the immediate 'm' modulo the size of the element in bits. /// The result is written to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bclri.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bclri_w(a: v4u32, imm5: i32) -> v4u32 { macro_rules! call { ($imm5:expr) => { msa_bclri_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Bit Clear /// /// Clear (set to 0) one bit in each element of vector `a` (two unsigned 64-bit integer numbers) /// The bit position is given by the immediate 'm' modulo the size of the element in bits. /// The result is written to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bclri.d, imm6 = 0b111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bclri_d(a: v2u64, imm6: i32) -> v2u64 { macro_rules! call { ($imm6:expr) => { msa_bclri_d(a, $imm6) }; } constify_imm6!(imm6, call) } /// Vector Bit Insert Left /// /// Copy most significant (left) bits in each element of vector `b` (sixteen unsigned 8-bit integer numbers) /// to elements in vector 'a' (sixteen unsigned 8-bit integer numbers) while preserving the least sig-nificant (right) bits. /// The number of bits to copy is given by the elements in vector 'c' (sixteen unsigned 8-bit integer numbers) /// modulo the size of the element inbits plus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsl.b))] pub unsafe fn __msa_binsl_b(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { msa_binsl_b(a, ::mem::transmute(b), c) } /// Vector Bit Insert Left /// /// Copy most significant (left) bits in each element of vector `b` (eight unsigned 16-bit integer numbers) /// to elements in vector 'a' (eight unsigned 16-bit integer numbers) while preserving the least sig-nificant (right) bits. /// The number of bits to copy is given by the elements in vector 'c' (eight unsigned 16-bit integer numbers) /// modulo the size of the element inbits plus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsl.h))] pub unsafe fn __msa_binsl_h(a: v8u16, b: v8u16, c: v8u16) -> v8u16 { msa_binsl_h(a, ::mem::transmute(b), c) } /// Vector Bit Insert Left /// /// Copy most significant (left) bits in each element of vector `b` (four unsigned 32-bit integer numbers) /// to elements in vector 'a' (four unsigned 32-bit integer numbers) while preserving the least sig-nificant (right) bits. /// The number of bits to copy is given by the elements in vector 'c' (four unsigned 32-bit integer numbers) /// modulo the size of the element inbits plus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsl.w))] pub unsafe fn __msa_binsl_w(a: v4u32, b: v4u32, c: v4u32) -> v4u32 { msa_binsl_w(a, ::mem::transmute(b), c) } /// Vector Bit Insert Left /// /// Copy most significant (left) bits in each element of vector `b` (two unsigned 64-bit integer numbers) /// to elements in vector 'a' (two unsigned 64-bit integer numbers) while preserving the least sig-nificant (right) bits. /// The number of bits to copy is given by the elements in vector 'c' (two unsigned 64-bit integer numbers) /// modulo the size of the element inbits plus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsl.d))] pub unsafe fn __msa_binsl_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64 { msa_binsl_d(a, ::mem::transmute(b), c) } /// Immediate Bit Insert Left /// /// Copy most significant (left) bits in each element of vector `b` (sixteen unsigned 8-bit integer numbers) /// to elements in vector 'a' (sixteen unsigned 8-bit integer numbers) while preserving the least sig-nificant (right) bits. /// The number of bits to copy is given by the immediate imm3 modulo the size of the element in bitsplus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsli.b, imm3 = 0b111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_binsli_b(a: v16u8, b: v16u8, imm3: i32) -> v16u8 { macro_rules! call { ($imm3:expr) => { msa_binsli_b(a, ::mem::transmute(b), $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Bit Insert Left /// /// Copy most significant (left) bits in each element of vector `b` (eight unsigned 16-bit integer numbers) /// to elements in vector 'a' (eight unsigned 16-bit integer numbers) while preserving the least sig-nificant (right) bits. /// The number of bits to copy is given by the immediate imm4 modulo the size of the element in bitsplus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsli.h, imm4 = 0b1111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_binsli_h(a: v8u16, b: v8u16, imm4: i32) -> v8u16 { macro_rules! call { ($imm4:expr) => { msa_binsli_h(a, ::mem::transmute(b), $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Bit Insert Left /// /// Copy most significant (left) bits in each element of vector `b` (four unsigned 32-bit integer numbers) /// to elements in vector 'a' (four unsigned 32-bit integer numbers) while preserving the least sig-nificant (right) bits. /// The number of bits to copy is given by the immediate imm5 modulo the size of the element in bitsplus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsli.w, imm5 = 0b11111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_binsli_w(a: v4u32, b: v4u32, imm5: i32) -> v4u32 { macro_rules! call { ($imm5:expr) => { msa_binsli_w(a, ::mem::transmute(b), $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Bit Insert Left /// /// Copy most significant (left) bits in each element of vector `b` (two unsigned 64-bit integer numbers) /// to elements in vector 'a' (two unsigned 64-bit integer numbers) while preserving the least sig-nificant (right) bits. /// The number of bits to copy is given by the immediate imm6 modulo the size of the element in bitsplus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsli.d, imm6 = 0b111111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_binsli_d(a: v2u64, b: v2u64, imm6: i32) -> v2u64 { macro_rules! call { ($imm6:expr) => { msa_binsli_d(a, ::mem::transmute(b), $imm6) }; } constify_imm6!(imm6, call) } /// Vector Bit Insert Right /// /// Copy most significant (right) bits in each element of vector `b` (sixteen unsigned 8-bit integer numbers) /// to elements in vector 'a' (sixteen unsigned 8-bit integer numbers) while preserving the least sig-nificant (left) bits. /// The number of bits to copy is given by the elements in vector 'c' (sixteen unsigned 8-bit integer numbers) /// modulo the size of the element inbits plus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsr.b))] pub unsafe fn __msa_binsr_b(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { msa_binsr_b(a, ::mem::transmute(b), c) } /// Vector Bit Insert Right /// /// Copy most significant (right) bits in each element of vector `b` (eight unsigned 16-bit integer numbers) /// to elements in vector 'a' (eight unsigned 16-bit integer numbers) while preserving the least sig-nificant (left) bits. /// The number of bits to copy is given by the elements in vector 'c' (eight unsigned 16-bit integer numbers) /// modulo the size of the element inbits plus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsr.h))] pub unsafe fn __msa_binsr_h(a: v8u16, b: v8u16, c: v8u16) -> v8u16 { msa_binsr_h(a, ::mem::transmute(b), c) } /// Vector Bit Insert Right /// /// Copy most significant (right) bits in each element of vector `b` (four unsigned 32-bit integer numbers) /// to elements in vector 'a' (four unsigned 32-bit integer numbers) while preserving the least sig-nificant (left) bits. /// The number of bits to copy is given by the elements in vector 'c' (four unsigned 32-bit integer numbers) /// modulo the size of the element inbits plus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsr.w))] pub unsafe fn __msa_binsr_w(a: v4u32, b: v4u32, c: v4u32) -> v4u32 { msa_binsr_w(a, ::mem::transmute(b), c) } /// Vector Bit Insert Right /// /// Copy most significant (right) bits in each element of vector `b` (two unsigned 64-bit integer numbers) /// to elements in vector 'a' (two unsigned 64-bit integer numbers) while preserving the least sig-nificant (left) bits. /// The number of bits to copy is given by the elements in vector 'c' (two unsigned 64-bit integer numbers) /// modulo the size of the element inbits plus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsr.d))] pub unsafe fn __msa_binsr_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64 { msa_binsr_d(a, ::mem::transmute(b), c) } /// Immediate Bit Insert Right /// /// Copy most significant (right) bits in each element of vector `b` (sixteen unsigned 8-bit integer numbers) /// to elements in vector 'a' (sixteen unsigned 8-bit integer numbers) while preserving the least sig-nificant (left) bits. /// The number of bits to copy is given by the immediate imm3 modulo the size of the element in bitsplus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsri.b, imm3 = 0b111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_binsri_b(a: v16u8, b: v16u8, imm3: i32) -> v16u8 { macro_rules! call { ($imm3:expr) => { msa_binsri_b(a, ::mem::transmute(b), $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Bit Insert Right /// /// Copy most significant (right) bits in each element of vector `b` (eight unsigned 16-bit integer numbers) /// to elements in vector 'a' (eight unsigned 16-bit integer numbers) while preserving the least sig-nificant (left) bits. /// The number of bits to copy is given by the immediate imm4 modulo the size of the element in bitsplus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsri.h, imm4 = 0b1111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_binsri_h(a: v8u16, b: v8u16, imm4: i32) -> v8u16 { macro_rules! call { ($imm4:expr) => { msa_binsri_h(a, ::mem::transmute(b), $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Bit Insert Right /// /// Copy most significant (right) bits in each element of vector `b` (four unsigned 32-bit integer numbers) /// to elements in vector 'a' (four unsigned 32-bit integer numbers) while preserving the least sig-nificant (left) bits. /// The number of bits to copy is given by the immediate imm5 modulo the size of the element in bitsplus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsri.w, imm5 = 0b11111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_binsri_w(a: v4u32, b: v4u32, imm5: i32) -> v4u32 { macro_rules! call { ($imm5:expr) => { msa_binsri_w(a, ::mem::transmute(b), $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Bit Insert Right /// /// Copy most significant (right) bits in each element of vector `b` (two unsigned 64-bit integer numbers) /// to elements in vector 'a' (two unsigned 64-bit integer numbers) while preserving the least sig-nificant (left) bits. /// The number of bits to copy is given by the immediate imm6 modulo the size of the element in bitsplus 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(binsri.d, imm6 = 0b111111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_binsri_d(a: v2u64, b: v2u64, imm6: i32) -> v2u64 { macro_rules! call { ($imm6:expr) => { msa_binsri_d(a, ::mem::transmute(b), $imm6) }; } constify_imm6!(imm6, call) } /// Vector Bit Move If Not Zero /// /// Copy to destination vector 'a' (sixteen unsigned 8-bit integer numbers) all bits from source vector /// 'b' (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from target vector 'c' /// (sixteen unsigned 8-bit integer numbers) are 1 and leaves unchanged all destination bits /// for which the corresponding target bits are 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bmnz.v))] pub unsafe fn __msa_bmnz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { msa_bmnz_v(a, ::mem::transmute(b), c) } /// Immediate Bit Move If Not Zero /// /// Copy to destination vector 'a' (sixteen unsigned 8-bit integer numbers) all bits from source vector /// 'b' (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from from immediate imm8 /// are 1 and leaves unchanged all destination bits for which the corresponding target bits are 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bmnzi.b, imm8 = 0b11111111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_bmnzi_b(a: v16u8, b: v16u8, imm8: i32) -> v16u8 { macro_rules! call { ($imm8:expr) => { msa_bmnzi_b(a, ::mem::transmute(b), $imm8) }; } constify_imm8!(imm8, call) } /// Vector Bit Move If Zero /// /// Copy to destination vector 'a' (sixteen unsigned 8-bit integer numbers) all bits from source vector /// 'b' (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from target vector 'c' /// (sixteen unsigned 8-bit integer numbers) are 0 and leaves unchanged all destination bits /// for which the corresponding target bits are 1 /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bmz.v))] pub unsafe fn __msa_bmz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { msa_bmz_v(a, ::mem::transmute(b), c) } /// Immediate Bit Move If Zero /// /// Copy to destination vector 'a' (sixteen unsigned 8-bit integer numbers) all bits from source vector /// 'b' (sixteen unsigned 8-bit integer numbers) for which the corresponding bits from from immediate imm8 /// are 0 and leaves unchanged all destination bits for which the corresponding immediate bits are 1. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bmzi.b, imm8 = 0b11111111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_bmzi_b(a: v16u8, b: v16u8, imm8: i32) -> v16u8 { macro_rules! call { ($imm8:expr) => { msa_bmzi_b(a, ::mem::transmute(b), $imm8) }; } constify_imm8!(imm8, call) } /// Vector Bit Negate /// /// Negate (complement) one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers) /// The bit position is given by the elements in vector 'b' (sixteen unsigned 8-bit integer numbers) /// modulo thesize of the element in bits. /// The result is written to vector (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bneg.b))] pub unsafe fn __msa_bneg_b(a: v16u8, b: v16u8) -> v16u8 { msa_bneg_b(a, ::mem::transmute(b)) } /// Vector Bit Negate /// /// Negate (complement) one bit in each element of vector `a` (eight unsigned 16-bit integer numbers) /// The bit position is given by the elements in vector 'b' (eight unsigned 16-bit integer numbers) /// modulo thesize of the element in bits. /// The result is written to vector (eight unsigned 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bneg.h))] pub unsafe fn __msa_bneg_h(a: v8u16, b: v8u16) -> v8u16 { msa_bneg_h(a, ::mem::transmute(b)) } /// Vector Bit Negate /// /// Negate (complement) one bit in each element of vector `a` (four unsigned 32-bit integer numbers) /// The bit position is given by the elements in vector 'b' (four unsigned 32-bit integer numbers) /// modulo thesize of the element in bits. /// The result is written to vector (four unsigned 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bneg.w))] pub unsafe fn __msa_bneg_w(a: v4u32, b: v4u32) -> v4u32 { msa_bneg_w(a, ::mem::transmute(b)) } /// Vector Bit Negate /// /// Negate (complement) one bit in each element of vector `a` (two unsigned 64-bit integer numbers) /// The bit position is given by the elements in vector 'b' (two unsigned 64-bit integer numbers) /// modulo thesize of the element in bits. /// The result is written to vector (two unsigned 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bneg.d))] pub unsafe fn __msa_bneg_d(a: v2u64, b: v2u64) -> v2u64 { msa_bneg_d(a, ::mem::transmute(b)) } /// Immediate Bit Negate /// /// Negate (complement) one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers) /// The bit position is given by immediate imm3 modulo thesize of the element in bits. /// The result is written to vector (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bnegi.b, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bnegi_b(a: v16u8, imm3: i32) -> v16u8 { macro_rules! call { ($imm3:expr) => { msa_bnegi_b(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Bit Negate /// /// Negate (complement) one bit in each element of vector `a` (eight unsigned 16-bit integer numbers) /// The bit position is given by immediate imm4 modulo thesize of the element in bits. /// The result is written to vector (eight unsigned 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bnegi.h, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bnegi_h(a: v8u16, imm4: i32) -> v8u16 { macro_rules! call { ($imm4:expr) => { msa_bnegi_h(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Bit Negate /// /// Negate (complement) one bit in each element of vector `a` (four unsigned 32-bit integer numbers) /// The bit position is given by immediate imm5 modulo thesize of the element in bits. /// The result is written to vector (four unsigned 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bnegi.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bnegi_w(a: v4u32, imm5: i32) -> v4u32 { macro_rules! call { ($imm5:expr) => { msa_bnegi_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Bit Negate /// /// Negate (complement) one bit in each element of vector `a` (two unsigned 64-bit integer numbers) /// The bit position is given by immediate imm6 modulo thesize of the element in bits. /// The result is written to vector (two unsigned 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bnegi.d, imm6 = 0b111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bnegi_d(a: v2u64, imm6: i32) -> v2u64 { macro_rules! call { ($imm6:expr) => { msa_bnegi_d(a, $imm6) }; } constify_imm6!(imm6, call) } /// Immediate Branch If All Elements Are Not Zero /// /// PC-relative branch if all elements in 'a' (sixteen unsigned 8-bit integer numbers) are not zero. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bnz.b))] pub unsafe fn __msa_bnz_b(a: v16u8) -> i32 { msa_bnz_b(a) } /// Immediate Branch If All Elements Are Not Zero /// /// PC-relative branch if all elements in 'a' (eight unsigned 16-bit integer numbers) are not zero. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bnz.h))] pub unsafe fn __msa_bnz_h(a: v8u16) -> i32 { msa_bnz_h(a) } /// Immediate Branch If All Elements Are Not Zero /// /// PC-relative branch if all elements in 'a' (four unsigned 32-bit integer numbers) are not zero. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bnz.w))] pub unsafe fn __msa_bnz_w(a: v4u32) -> i32 { msa_bnz_w(a) } /// Immediate Branch If All Elements Are Not Zero /// /// PC-relative branch if all elements in 'a' (two unsigned 64-bit integer numbers) are not zero. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bnz.d))] pub unsafe fn __msa_bnz_d(a: v2u64) -> i32 { msa_bnz_d(a) } /// Immediate Branch If Not Zero (At Least One Element of Any Format Is Not Zero) /// /// PC-relative branch if at least one bit in 'a' (four unsigned 32-bit integer numbers) are not zero. /// i.e at least one element is not zero regardless of the data format. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bnz.v))] pub unsafe fn __msa_bnz_v(a: v16u8) -> i32 { msa_bnz_v(a) } /// Vector Bit Select /// /// Selectively copy bits from the source vectors 'b' (eight unsigned 16-bit integer numbers) /// and 'c' (eight unsigned 16-bit integer numbers) /// into destination vector 'a' (eight unsigned 16-bit integer numbers) based on the corresponding bit in a: /// if 0 copies the bit from 'b', if 1 copies the bit from 'c'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bsel.v))] pub unsafe fn __msa_bsel_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 { msa_bsel_v(a, ::mem::transmute(b), c) } /// Immediate Bit Select /// /// Selectively copy bits from the 8-bit immediate imm8 and 'c' (eight unsigned 16-bit integer numbers) /// into destination vector 'a' (eight unsigned 16-bit integer numbers) based on the corresponding bit in a: /// if 0 copies the bit from 'b', if 1 copies the bit from 'c'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bseli.b, imm8 = 0b11111111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_bseli_b(a: v16u8, b: v16u8, imm8: i32) -> v16u8 { macro_rules! call { ($imm8:expr) => { msa_bseli_b(a, ::mem::transmute(b), $imm8) }; } constify_imm8!(imm8, call) } /// Vector Bit Set /// /// Set to 1 one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers) /// The bit position is given by the elements in vector 'b' (sixteen unsigned 8-bit integer numbers) /// modulo the size of the element in bits. /// The result is written to vector (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bset.b))] pub unsafe fn __msa_bset_b(a: v16u8, b: v16u8) -> v16u8 { msa_bset_b(a, ::mem::transmute(b)) } /// Vector Bit Set /// /// Set to 1 one bit in each element of vector `a` (eight unsigned 16-bit integer numbers) /// The bit position is given by the elements in vector 'b' (eight unsigned 16-bit integer numbers) /// modulo the size of the element in bits. /// The result is written to vector (eight unsigned 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bset.h))] pub unsafe fn __msa_bset_h(a: v8u16, b: v8u16) -> v8u16 { msa_bset_h(a, ::mem::transmute(b)) } /// Vector Bit Set /// /// Set to 1 one bit in each element of vector `a` (four unsigned 32-bit integer numbers) /// The bit position is given by the elements in vector 'b' (four unsigned 32-bit integer numbers) /// modulo the size of the element in bits. /// The result is written to vector (four unsigned 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bset.w))] pub unsafe fn __msa_bset_w(a: v4u32, b: v4u32) -> v4u32 { msa_bset_w(a, ::mem::transmute(b)) } /// Vector Bit Set /// /// Set to 1 one bit in each element of vector `a` (two unsigned 64-bit integer numbers) /// The bit position is given by the elements in vector 'b' (two unsigned 64-bit integer numbers) /// modulo the size of the element in bits. /// The result is written to vector (two unsigned 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bset.d))] pub unsafe fn __msa_bset_d(a: v2u64, b: v2u64) -> v2u64 { msa_bset_d(a, ::mem::transmute(b)) } /// Immediate Bit Set /// /// Set to 1 one bit in each element of vector `a` (sixteen unsigned 8-bit integer numbers) /// The bit position is given by immediate imm3. /// The result is written to vector 'a'(sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bseti.b, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bseti_b(a: v16u8, imm3: i32) -> v16u8 { macro_rules! call { ($imm3:expr) => { msa_bseti_b(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Bit Set /// /// Set to 1 one bit in each element of vector `a` (eight unsigned 16-bit integer numbers) /// The bit position is given by immediate imm4. /// The result is written to vector 'a'(eight unsigned 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bseti.h, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bseti_h(a: v8u16, imm4: i32) -> v8u16 { macro_rules! call { ($imm4:expr) => { msa_bseti_h(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Bit Set /// /// Set to 1 one bit in each element of vector `a` (four unsigned 32-bit integer numbers) /// The bit position is given by immediate imm5. /// The result is written to vector 'a'(four unsigned 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bseti.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bseti_w(a: v4u32, imm5: i32) -> v4u32 { macro_rules! call { ($imm5:expr) => { msa_bseti_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Bit Set /// /// Set to 1 one bit in each element of vector `a` (two unsigned 64-bit integer numbers) /// The bit position is given by immediate imm6. /// The result is written to vector 'a'(two unsigned 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bseti.d, imm6 = 0b111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_bseti_d(a: v2u64, imm6: i32) -> v2u64 { macro_rules! call { ($imm6:expr) => { msa_bseti_d(a, $imm6) }; } constify_imm6!(imm6, call) } /// Immediate Branch If At Least One Element Is Zero /// /// PC-relative branch if at least one element in 'a' (sixteen unsigned 8-bit integer numbers) is zero. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bz.b))] pub unsafe fn __msa_bz_b(a: v16u8) -> i32 { msa_bz_b(a) } /// Immediate Branch If At Least One Element Is Zero /// /// PC-relative branch if at least one element in 'a' (eight unsigned 16-bit integer numbers) is zero. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bz.h))] pub unsafe fn __msa_bz_h(a: v8u16) -> i32 { msa_bz_h(a) } /// Immediate Branch If At Least One Element Is Zero /// /// PC-relative branch if at least one element in 'a' (four unsigned 32-bit integer numbers) is zero. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bz.w))] pub unsafe fn __msa_bz_w(a: v4u32) -> i32 { msa_bz_w(a) } /// Immediate Branch If At Least One Element Is Zero /// /// PC-relative branch if at least one element in 'a' (two unsigned 64-bit integer numbers) is zero. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bz.d))] pub unsafe fn __msa_bz_d(a: v2u64) -> i32 { msa_bz_d(a) } /// Immediate Branch If Zero (All Elements of Any Format Are Zero) /// /// PC-relative branch if all elements in 'a' (sixteen unsigned 8-bit integer numbers) bits are zero, /// i.e. all elements are zero regardless of the data format /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(bz.v))] pub unsafe fn __msa_bz_v(a: v16u8) -> i32 { msa_bz_v(a) } /// Vector Compare Equal /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen signed 8-bit integer numbers) and 'b' (sixteen signed 8-bit integer numbers) /// elements are equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ceq.b))] pub unsafe fn __msa_ceq_b(a: v16i8, b: v16i8) -> v16i8 { msa_ceq_b(a, ::mem::transmute(b)) } /// Vector Compare Equal /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight signed 16-bit integer numbers) and 'b' (eight signed 16-bit integer numbers) /// elements are equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ceq.h))] pub unsafe fn __msa_ceq_h(a: v8i16, b: v8i16) -> v8i16 { msa_ceq_h(a, ::mem::transmute(b)) } /// Vector Compare Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four signed 32-bit integer numbers) and 'b' (four signed 32-bit integer numbers) /// elements are equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ceq.w))] pub unsafe fn __msa_ceq_w(a: v4i32, b: v4i32) -> v4i32 { msa_ceq_w(a, ::mem::transmute(b)) } /// Vector Compare Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two signed 64-bit integer numbers) and 'b' (two signed 64-bit integer numbers) /// elements are equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ceq.d))] pub unsafe fn __msa_ceq_d(a: v2i64, b: v2i64) -> v2i64 { msa_ceq_d(a, ::mem::transmute(b)) } /// Immediate Compare Equal /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen signed 8-bit integer numbers) the 5-bit signed immediate imm_s5 /// are equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ceqi.b, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_ceqi_b(a: v16i8, imm_s5: i32) -> v16i8 { macro_rules! call { ($imm_s5:expr) => { msa_ceqi_b(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Equal /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight signed 16-bit integer numbers) the 5-bit signed immediate imm_s5 /// are equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ceqi.h, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_ceqi_h(a: v8i16, imm_s5: i32) -> v8i16 { macro_rules! call { ($imm_s5:expr) => { msa_ceqi_h(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four signed 32-bit integer numbers) the 5-bit signed immediate imm_s5 /// are equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ceqi.w, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_ceqi_w(a: v4i32, imm_s5: i32) -> v4i32 { macro_rules! call { ($imm_s5:expr) => { msa_ceqi_w(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two signed 64-bit integer numbers) the 5-bit signed immediate imm_s5 /// are equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ceqi.d, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_ceqi_d(a: v2i64, imm_s5: i32) -> v2i64 { macro_rules! call { ($imm_s5:expr) => { msa_ceqi_d(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// GPR Copy from MSA Control Register /// /// The sign extended content of MSA control register cs is copied to GPRrd. /// /// Can not be tested in user mode #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(cfcmsa, imm5 = 0b11111))] #[rustc_args_required_const(0)] pub unsafe fn __msa_cfcmsa(imm5: i32) -> i32 { macro_rules! call { ($imm5:expr) => { msa_cfcmsa($imm5) }; } constify_imm5!(imm5, call) } /// Vector Compare Signed Less Than or Equal /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen signed 8-bit integer numbers) element /// are signed less than or equal to 'b' (sixteen signed 8-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(cle_s.b))] pub unsafe fn __msa_cle_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_cle_s_b(a, ::mem::transmute(b)) } /// Vector Compare Signed Less Than or Equal /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight signed 16-bit integer numbers) element /// are signed less than or equal to 'b' (eight signed 16-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(cle_s.h))] pub unsafe fn __msa_cle_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_cle_s_h(a, ::mem::transmute(b)) } /// Vector Compare Signed Less Than or Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four signed 32-bit integer numbers) element /// are signed less than or equal to 'b' (four signed 32-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(cle_s.w))] pub unsafe fn __msa_cle_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_cle_s_w(a, ::mem::transmute(b)) } /// Vector Compare Signed Less Than or Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two signed 64-bit integer numbers) element /// are signed less than or equal to 'b' (two signed 64-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(cle_s.d))] pub unsafe fn __msa_cle_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_cle_s_d(a, ::mem::transmute(b)) } /// Vector Compare Unsigned Less Than or Equal /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen unsigned 8-bit integer numbers) element /// are unsigned less than or equal to 'b' (sixteen unsigned 8-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(cle_u.b))] pub unsafe fn __msa_cle_u_b(a: v16u8, b: v16u8) -> v16i8 { msa_cle_u_b(a, ::mem::transmute(b)) } /// Vector Compare Unsigned Less Than or Equal /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight unsigned 16-bit integer numbers) element /// are unsigned less than or equal to 'b' (eight unsigned 16-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(cle_u.h))] pub unsafe fn __msa_cle_u_h(a: v8u16, b: v8u16) -> v8i16 { msa_cle_u_h(a, ::mem::transmute(b)) } /// Vector Compare Unsigned Less Than or Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four unsigned 32-bit integer numbers) element /// are unsigned less than or equal to 'b' (four unsigned 32-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(cle_u.w))] pub unsafe fn __msa_cle_u_w(a: v4u32, b: v4u32) -> v4i32 { msa_cle_u_w(a, ::mem::transmute(b)) } /// Vector Compare Unsigned Less Than or Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two unsigned 64-bit integer numbers) element /// are unsigned less than or equal to 'b' (two unsigned 64-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(cle_u.d))] pub unsafe fn __msa_cle_u_d(a: v2u64, b: v2u64) -> v2i64 { msa_cle_u_d(a, ::mem::transmute(b)) } /// Immediate Compare Signed Less Than or Equal /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen signed 8-bit integer numbers) element /// is less than or equal to the 5-bit signed immediate imm_s5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clei_s.b, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clei_s_b(a: v16i8, imm_s5: i32) -> v16i8 { macro_rules! call { ($imm_s5:expr) => { msa_clei_s_b(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Signed Less Than or Equal /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight signed 16-bit integer numbers) element /// is less than or equal to the 5-bit signed immediate imm_s5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clei_s.h, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clei_s_h(a: v8i16, imm_s5: i32) -> v8i16 { macro_rules! call { ($imm_s5:expr) => { msa_clei_s_h(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Signed Less Than or Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four signed 32-bit integer numbers) element /// is less than or equal to the 5-bit signed immediate imm_s5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clei_s.w, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clei_s_w(a: v4i32, imm_s5: i32) -> v4i32 { macro_rules! call { ($imm_s5:expr) => { msa_clei_s_w(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Signed Less Than or Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two signed 64-bit integer numbers) element /// is less than or equal to the 5-bit signed immediate imm_s5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clei_s.d, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clei_s_d(a: v2i64, imm_s5: i32) -> v2i64 { macro_rules! call { ($imm_s5:expr) => { msa_clei_s_d(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Unsigned Less Than or Equal /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen unsigned 8-bit integer numbers) element /// is unsigned less than or equal to the 5-bit unsigned immediate imm5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clei_u.b, imm5 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clei_u_b(a: v16u8, imm5: i32) -> v16i8 { macro_rules! call { ($imm5:expr) => { msa_clei_u_b(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Compare Unsigned Less Than or Equal /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight unsigned 16-bit integer numbers) element /// is unsigned less than or equal to the 5-bit unsigned immediate imm5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clei_u.h, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clei_u_h(a: v8u16, imm5: i32) -> v8i16 { macro_rules! call { ($imm5:expr) => { msa_clei_u_h(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Compare Unsigned Less Than or Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four unsigned 32-bit integer numbers) element /// is unsigned less than or equal to the 5-bit unsigned immediate imm5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clei_u.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clei_u_w(a: v4u32, imm5: i32) -> v4i32 { macro_rules! call { ($imm5:expr) => { msa_clei_u_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Compare Unsigned Less Than or Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two unsigned 64-bit integer numbers) element /// is unsigned less than or equal to the 5-bit unsigned immediate imm5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clei_u.d, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clei_u_d(a: v2u64, imm5: i32) -> v2i64 { macro_rules! call { ($imm5:expr) => { msa_clei_u_d(a, $imm5) }; } constify_imm5!(imm5, call) } /// Vector Compare Signed Less Than /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen signed 8-bit integer numbers) element /// are signed less than 'b' (sixteen signed 8-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clt_s.b))] pub unsafe fn __msa_clt_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_clt_s_b(a, ::mem::transmute(b)) } /// Vector Compare Signed Less Than /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight signed 16-bit integer numbers) element /// are signed less than 'b' (eight signed 16-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clt_s.h))] pub unsafe fn __msa_clt_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_clt_s_h(a, ::mem::transmute(b)) } /// Vector Compare Signed Less Than /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four signed 32-bit integer numbers) element /// are signed less than 'b' (four signed 32-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clt_s.w))] pub unsafe fn __msa_clt_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_clt_s_w(a, ::mem::transmute(b)) } /// Vector Compare Signed Less Than /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two signed 64-bit integer numbers) element /// are signed less than 'b' (two signed 64-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clt_s.d))] pub unsafe fn __msa_clt_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_clt_s_d(a, ::mem::transmute(b)) } /// Vector Compare Unsigned Less Than /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen unsigned 8-bit integer numbers) element /// are unsigned less than 'b' (sixteen unsigned 8-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clt_u.b))] pub unsafe fn __msa_clt_u_b(a: v16u8, b: v16u8) -> v16i8 { msa_clt_u_b(a, ::mem::transmute(b)) } /// Vector Compare Unsigned Less Than /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight unsigned 16-bit integer numbers) element /// are unsigned less than 'b' (eight unsigned 16-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clt_u.h))] pub unsafe fn __msa_clt_u_h(a: v8u16, b: v8u16) -> v8i16 { msa_clt_u_h(a, ::mem::transmute(b)) } /// Vector Compare Unsigned Less Than /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four unsigned 32-bit integer numbers) element /// are unsigned less than 'b' (four unsigned 32-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clt_u.w))] pub unsafe fn __msa_clt_u_w(a: v4u32, b: v4u32) -> v4i32 { msa_clt_u_w(a, ::mem::transmute(b)) } /// Vector Compare Unsigned Less Than /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two unsigned 64-bit integer numbers) element /// are unsigned less than 'b' (two unsigned 64-bit integer numbers) element. /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clt_u.d))] pub unsafe fn __msa_clt_u_d(a: v2u64, b: v2u64) -> v2i64 { msa_clt_u_d(a, ::mem::transmute(b)) } /// Immediate Compare Signed Less Than /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen signed 8-bit integer numbers) element /// is less than the 5-bit signed immediate imm_s5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clti_s.b, imm_s5 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clti_s_b(a: v16i8, imm_s5: i32) -> v16i8 { macro_rules! call { ($imm_s5:expr) => { msa_clti_s_b(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Signed Less Than /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight signed 16-bit integer numbers) element /// is less than the 5-bit signed immediate imm_s5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clti_s.h, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clti_s_h(a: v8i16, imm_s5: i32) -> v8i16 { macro_rules! call { ($imm_s5:expr) => { msa_clti_s_h(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Signed Less Than /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four signed 32-bit integer numbers) element /// is less than the 5-bit signed immediate imm_s5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clti_s.w, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clti_s_w(a: v4i32, imm_s5: i32) -> v4i32 { macro_rules! call { ($imm_s5:expr) => { msa_clti_s_w(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Signed Less Than /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two signed 64-bit integer numbers) element /// is less than the 5-bit signed immediate imm_s5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clti_s.d, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clti_s_d(a: v2i64, imm_s5: i32) -> v2i64 { macro_rules! call { ($imm_s5:expr) => { msa_clti_s_d(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Compare Unsigned Less Than /// /// Set all bits to 1 in vector (sixteen signed 8-bit integer numbers) elements /// if the corresponding 'a' (sixteen unsigned 8-bit integer numbers) element /// is unsigned less than the 5-bit unsigned immediate imm5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clti_u.b, imm5 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clti_u_b(a: v16u8, imm5: i32) -> v16i8 { macro_rules! call { ($imm5:expr) => { msa_clti_u_b(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Compare Unsigned Less Than /// /// Set all bits to 1 in vector (eight signed 16-bit integer numbers) elements /// if the corresponding 'a' (eight unsigned 16-bit integer numbers) element /// is unsigned less than the 5-bit unsigned immediate imm5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clti_u.h, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clti_u_h(a: v8u16, imm5: i32) -> v8i16 { macro_rules! call { ($imm5:expr) => { msa_clti_u_h(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Compare Unsigned Less Than /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four unsigned 32-bit integer numbers) element /// is unsigned less than the 5-bit unsigned immediate imm5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clti_u.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clti_u_w(a: v4u32, imm5: i32) -> v4i32 { macro_rules! call { ($imm5:expr) => { msa_clti_u_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Compare Unsigned Less Than /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two unsigned 64-bit integer numbers) element /// is unsigned less than the 5-bit unsigned immediate imm5, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(clti_u.d, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_clti_u_d(a: v2u64, imm5: i32) -> v2i64 { macro_rules! call { ($imm5:expr) => { msa_clti_u_d(a, $imm5) }; } constify_imm5!(imm5, call) } /// Element Copy to GPR Signed /// /// Sign-extend element imm4 of vector 'a' (sixteen signed 8-bit integer numbers) /// and copy the result to GPR rd /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(copy_s.b, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_copy_s_b(a: v16i8, imm4: i32) -> i32 { macro_rules! call { ($imm4:expr) => { msa_copy_s_b(a, $imm4) }; } constify_imm4!(imm4, call) } /// Element Copy to GPR Signed /// /// Sign-extend element imm3 of vector 'a' (eight signed 16-bit integer numbers) /// and copy the result to GPR rd /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(copy_s.h, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_copy_s_h(a: v8i16, imm3: i32) -> i32 { macro_rules! call { ($imm3:expr) => { msa_copy_s_h(a, $imm3) }; } constify_imm3!(imm3, call) } /// Element Copy to GPR Signed /// /// Sign-extend element imm2 of vector 'a' (four signed 32-bit integer numbers) /// and copy the result to GPR rd /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(copy_s.w, imm2 = 0b11))] #[rustc_args_required_const(1)] pub unsafe fn __msa_copy_s_w(a: v4i32, imm2: i32) -> i32 { macro_rules! call { ($imm2:expr) => { msa_copy_s_w(a, $imm2) }; } constify_imm2!(imm2, call) } /// Element Copy to GPR Signed /// /// Sign-extend element imm1 of vector 'a' (two signed 64-bit integer numbers) /// and copy the result to GPR rd /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(copy_s.d, imm1 = 0b1))] #[rustc_args_required_const(1)] pub unsafe fn __msa_copy_s_d(a: v2i64, imm1: i32) -> i64 { macro_rules! call { ($imm1:expr) => { msa_copy_s_d(a, $imm1) }; } constify_imm1!(imm1, call) } /// Element Copy to GPR Unsigned /// /// Zero-extend element imm4 of vector 'a' (sixteen signed 8-bit integer numbers) /// and copy the result to GPR rd /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(copy_u.b, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_copy_u_b(a: v16i8, imm4: i32) -> u32 { macro_rules! call { ($imm4:expr) => { msa_copy_u_b(a, $imm4) }; } constify_imm4!(imm4, call) } /// Element Copy to GPR Unsigned /// /// Zero-extend element imm3 of vector 'a' (eight signed 16-bit integer numbers) /// and copy the result to GPR rd /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(copy_u.h, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_copy_u_h(a: v8i16, imm3: i32) -> u32 { macro_rules! call { ($imm3:expr) => { msa_copy_u_h(a, $imm3) }; } constify_imm3!(imm3, call) } /// Element Copy to GPR Unsigned /// /// Zero-extend element imm2 of vector 'a' (four signed 32-bit integer numbers) /// and copy the result to GPR rd /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(copy_u.w, imm2 = 0b11))] #[rustc_args_required_const(1)] pub unsafe fn __msa_copy_u_w(a: v4i32, imm2: i32) -> u32 { macro_rules! call { ($imm2:expr) => { msa_copy_u_w(a, $imm2) }; } constify_imm2!(imm2, call) } /// Element Copy to GPR Unsigned /// /// Zero-extend element imm1 of vector 'a' (two signed 64-bit integer numbers) /// and copy the result to GPR rd /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(copy_u.d, imm1 = 0b1))] #[rustc_args_required_const(1)] pub unsafe fn __msa_copy_u_d(a: v2i64, imm1: i32) -> u64 { macro_rules! call { ($imm1:expr) => { msa_copy_u_d(a, $imm1) }; } constify_imm1!(imm1, call) } /// GPR Copy to MSA Control Register /// The content of the least significant 31 bits of GPR imm1 is copied to /// MSA control register cd /// Can not be tested in user mode #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ctcmsa, imm1 = 0b1))] #[rustc_args_required_const(0)] pub unsafe fn __msa_ctcmsa(imm5: i32, a: i32) -> () { macro_rules! call { ($imm5:expr) => { msa_ctcmsa($imm5, a) }; } constify_imm5!(imm5, call) } /// Vector Signed Divide /// /// The signed integer elements in vector 'a' (sixteen signed 8-bit integer numbers) /// are divided by signed integer elements in vector 'b' (sixteen signed 8-bit integer numbers). /// The result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(div_s.b))] pub unsafe fn __msa_div_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_div_s_b(a, ::mem::transmute(b)) } /// Vector Signed Divide /// /// The signed integer elements in vector 'a' (eight signed 16-bit integer numbers) /// are divided by signed integer elements in vector 'b' (eight signed 16-bit integer numbers). /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(div_s.h))] pub unsafe fn __msa_div_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_div_s_h(a, ::mem::transmute(b)) } /// Vector Signed Divide /// /// The signed integer elements in vector 'a' (four signed 32-bit integer numbers) /// are divided by signed integer elements in vector 'b' (four signed 32-bit integer numbers). /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(div_s.w))] pub unsafe fn __msa_div_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_div_s_w(a, ::mem::transmute(b)) } /// Vector Signed Divide /// /// The signed integer elements in vector 'a' (two signed 64-bit integer numbers) /// are divided by signed integer elements in vector 'b' (two signed 64-bit integer numbers). /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(div_s.d))] pub unsafe fn __msa_div_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_div_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Divide /// /// The unsigned integer elements in vector 'a' (sixteen unsigned 8-bit integer numbers) /// are divided by unsigned integer elements in vector 'b' (sixteen unsigned 8-bit integer numbers). /// The result is written to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(div_u.b))] pub unsafe fn __msa_div_u_b(a: v16u8, b: v16u8) -> v16u8 { msa_div_u_b(a, ::mem::transmute(b)) } /// Vector Unsigned Divide /// /// The unsigned integer elements in vector 'a' (eight unsigned 16-bit integer numbers) /// are divided by unsigned integer elements in vector 'b' (eight unsigned 16-bit integer numbers). /// The result is written to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(div_u.h))] pub unsafe fn __msa_div_u_h(a: v8u16, b: v8u16) -> v8u16 { msa_div_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Divide /// /// The unsigned integer elements in vector 'a' (four unsigned 32-bit integer numbers) /// are divided by unsigned integer elements in vector 'b' (four unsigned 32-bit integer numbers). /// The result is written to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(div_u.w))] pub unsafe fn __msa_div_u_w(a: v4u32, b: v4u32) -> v4u32 { msa_div_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Divide /// /// The unsigned integer elements in vector 'a' (two unsigned 64-bit integer numbers) /// are divided by unsigned integer elements in vector 'b' (two unsigned 64-bit integer numbers). /// The result is written to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(div_u.d))] pub unsafe fn __msa_div_u_d(a: v2u64, b: v2u64) -> v2u64 { msa_div_u_d(a, ::mem::transmute(b)) } /// Vector Signed Dot Product /// /// The signed integer elements in vector 'a' (sixteen signed 8-bit integer numbers) /// are multiplied by signed integer elements in vector 'b' (sixteen signed 8-bit integer numbers). /// producing a result the size of the input operands. The multiplication resultsof /// adjacent odd/even elements are added and stored to the destination /// vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dotp_s.h))] pub unsafe fn __msa_dotp_s_h(a: v16i8, b: v16i8) -> v8i16 { msa_dotp_s_h(a, ::mem::transmute(b)) } /// Vector Signed Dot Product /// /// The signed integer elements in vector 'a' (eight signed 16-bit integer numbers) /// are multiplied by signed integer elements in vector 'b' (eight signed 16-bit integer numbers). /// producing a result the size of the input operands. The multiplication resultsof /// adjacent odd/even elements are added and stored to the destination /// vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dotp_s.w))] pub unsafe fn __msa_dotp_s_w(a: v8i16, b: v8i16) -> v4i32 { msa_dotp_s_w(a, ::mem::transmute(b)) } /// Vector Signed Dot Product /// /// The signed integer elements in vector 'a' (four signed 32-bit integer numbers) /// are multiplied by signed integer elements in vector 'b' (four signed 32-bit integer numbers). /// producing a result the size of the input operands. The multiplication resultsof /// adjacent odd/even elements are added and stored to the destination /// vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dotp_s.d))] pub unsafe fn __msa_dotp_s_d(a: v4i32, b: v4i32) -> v2i64 { msa_dotp_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Dot Product /// /// The unsigned integer elements in vector 'a' (sixteen unsigned 8-bit integer numbers) /// are multiplied by unsigned integer elements in vector 'b' (sixteen unsigned 8-bit integer numbers). /// producing a result the size of the input operands. The multiplication resultsof /// adjacent odd/even elements are added and stored to the destination /// vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dotp_u.h))] pub unsafe fn __msa_dotp_u_h(a: v16u8, b: v16u8) -> v8u16 { msa_dotp_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Dot Product /// /// The unsigned integer elements in vector 'a' (eight unsigned 16-bit integer numbers) /// are multiplied by unsigned integer elements in vector 'b' (eight unsigned 16-bit integer numbers). /// producing a result the size of the input operands. The multiplication resultsof /// adjacent odd/even elements are added and stored to the destination /// vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dotp_u.w))] pub unsafe fn __msa_dotp_u_w(a: v8u16, b: v8u16) -> v4u32 { msa_dotp_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Dot Product /// /// The unsigned integer elements in vector 'a' (four unsigned 32-bit integer numbers) /// are multiplied by unsigned integer elements in vector 'b' (four unsigned 32-bit integer numbers). /// producing a result the size of the input operands. The multiplication resultsof /// adjacent odd/even elements are added and stored to the destination /// vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dotp_u.d))] pub unsafe fn __msa_dotp_u_d(a: v4u32, b: v4u32) -> v2u64 { msa_dotp_u_d(a, ::mem::transmute(b)) } /// Vector Signed Dot Product and Add /// /// The signed integer elements in vector 'b' (sixteen signed 8-bit integer numbers) /// are multiplied by signed integer elements in vector 'c' (sixteen signed 8-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are added to the vector 'a' (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpadd_s.h))] pub unsafe fn __msa_dpadd_s_h(a: v8i16, b: v16i8, c: v16i8) -> v8i16 { msa_dpadd_s_h(a, ::mem::transmute(b), c) } /// Vector Signed Dot Product and Add /// /// The signed integer elements in vector 'b' (eight signed 16-bit integer numbers) /// are multiplied by signed integer elements in vector 'c' (eight signed 16-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are added to the vector 'a' (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpadd_s.w))] pub unsafe fn __msa_dpadd_s_w(a: v4i32, b: v8i16, c: v8i16) -> v4i32 { msa_dpadd_s_w(a, ::mem::transmute(b), c) } /// Vector Signed Dot Product and Add /// /// The signed integer elements in vector 'b' (four signed 32-bit integer numbers) /// are multiplied by signed integer elements in vector 'c' (four signed 32-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are added to the vector 'a' (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpadd_s.d))] pub unsafe fn __msa_dpadd_s_d(a: v2i64, b: v4i32, c: v4i32) -> v2i64 { msa_dpadd_s_d(a, ::mem::transmute(b), c) } /// Vector Unsigned Dot Product and Add /// /// The unsigned integer elements in vector 'b' (sixteen unsigned 8-bit integer numbers) /// are multiplied by unsigned integer elements in vector 'c' (sixteen unsigned 8-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are added to the vector 'a' (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpadd_u.h))] pub unsafe fn __msa_dpadd_u_h(a: v8u16, b: v16u8, c: v16u8) -> v8u16 { msa_dpadd_u_h(a, ::mem::transmute(b), c) } /// Vector Unsigned Dot Product and Add /// /// The unsigned integer elements in vector 'b' (eight unsigned 16-bit integer numbers) /// are multiplied by unsigned integer elements in vector 'c' (eight unsigned 16-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are added to the vector 'a' (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpadd_u.w))] pub unsafe fn __msa_dpadd_u_w(a: v4u32, b: v8u16, c: v8u16) -> v4u32 { msa_dpadd_u_w(a, ::mem::transmute(b), c) } /// Vector Unsigned Dot Product and Add /// /// The unsigned integer elements in vector 'b' (four unsigned 32-bit integer numbers) /// are multiplied by unsigned integer elements in vector 'c' (four unsigned 32-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are added to the vector 'a' (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpadd_u.d))] pub unsafe fn __msa_dpadd_u_d(a: v2u64, b: v4u32, c: v4u32) -> v2u64 { msa_dpadd_u_d(a, ::mem::transmute(b), c) } /// Vector Signed Dot Product and Add /// /// The signed integer elements in vector 'b' (sixteen signed 8-bit integer numbers) /// are multiplied by signed integer elements in vector 'c' (sixteen signed 8-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are sub-tracted from the integer elements in vector 'a' /// (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpsub_s.h))] pub unsafe fn __msa_dpsub_s_h(a: v8i16, b: v16i8, c: v16i8) -> v8i16 { msa_dpsub_s_h(a, ::mem::transmute(b), c) } /// Vector Signed Dot Product and Add /// /// The signed integer elements in vector 'b' (eight signed 16-bit integer numbers) /// are multiplied by signed integer elements in vector 'c' (eight signed 16-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are sub-tracted from the integer elements in vector 'a' /// (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpsub_s.w))] pub unsafe fn __msa_dpsub_s_w(a: v4i32, b: v8i16, c: v8i16) -> v4i32 { msa_dpsub_s_w(a, ::mem::transmute(b), c) } /// Vector Signed Dot Product and Add /// /// The signed integer elements in vector 'b' (four signed 32-bit integer numbers) /// are multiplied by signed integer elements in vector 'c' (four signed 32-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are sub-tracted from the integer elements in vector 'a' /// (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpsub_s.d))] pub unsafe fn __msa_dpsub_s_d(a: v2i64, b: v4i32, c: v4i32) -> v2i64 { msa_dpsub_s_d(a, ::mem::transmute(b), c) } /// Vector Unsigned Dot Product and Add /// /// The unsigned integer elements in vector 'b' (sixteen unsigned 8-bit integer numbers) /// are multiplied by unsigned integer elements in vector 'c' (sixteen unsigned 8-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are sub-tracted from the integer elements in vector 'a' /// (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpsub_u.h))] pub unsafe fn __msa_dpsub_u_h(a: v8i16, b: v16u8, c: v16u8) -> v8i16 { msa_dpsub_u_h(a, ::mem::transmute(b), c) } /// Vector Unsigned Dot Product and Add /// /// The unsigned integer elements in vector 'b' (eight unsigned 16-bit integer numbers) /// are multiplied by unsigned integer elements in vector 'c' (eight unsigned 16-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are sub-tracted from the integer elements in vector 'a' /// (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpsub_u.w))] pub unsafe fn __msa_dpsub_u_w(a: v4i32, b: v8u16, c: v8u16) -> v4i32 { msa_dpsub_u_w(a, ::mem::transmute(b), c) } /// Vector Unsigned Dot Product and Add /// /// The unsigned integer elements in vector 'b' (four unsigned 32-bit integer numbers) /// are multiplied by unsigned integer elements in vector 'c' (four unsigned 32-bit integer numbers). /// producing a result twice the size of the input operands. The multiplication results /// of adjacent odd/even elements are sub-tracted from the integer elements in vector 'a' /// (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(dpsub_u.d))] pub unsafe fn __msa_dpsub_u_d(a: v2i64, b: v4u32, c: v4u32) -> v2i64 { msa_dpsub_u_d(a, ::mem::transmute(b), c) } /// Vector Floating-Point Addition /// /// The floating-point elements in vector 'a' (four 32-bit floating point numbers) /// are added to the floating-point elements in 'bc' (four 32-bit floating point numbers). /// The result is written to vector (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fadd.w))] pub unsafe fn __msa_fadd_w(a: v4f32, b: v4f32) -> v4f32 { msa_fadd_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Addition /// /// The floating-point elements in vector 'a' (two 64-bit floating point numbers) /// are added to the floating-point elements in 'bc' (two 64-bit floating point numbers). /// The result is written to vector (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fadd.d))] pub unsafe fn __msa_fadd_d(a: v2f64, b: v2f64) -> v2f64 { msa_fadd_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Always False /// /// Set all bits to 0 in vector (four signed 32-bit integer numbers) /// Signaling NaN elements in 'a' (four 32-bit floating point numbers) /// or 'b' (four 32-bit floating point numbers)signal Invalid Operation exception. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcaf.w))] pub unsafe fn __msa_fcaf_w(a: v4f32, b: v4f32) -> v4i32 { msa_fcaf_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Always False /// /// Set all bits to 0 in vector (two signed 64-bit integer numbers) /// Signaling NaN elements in 'a' (two 64-bit floating point numbers) /// or 'b' (two 64-bit floating point numbers) signal Invalid Operation exception. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcaf.d))] pub unsafe fn __msa_fcaf_d(a: v2f64, b: v2f64) -> v2i64 { msa_fcaf_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding in 'a' (four 32-bit floating point numbers) /// and 'b' (four 32-bit floating point numbers)elements are ordered and equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fceq.w))] pub unsafe fn __msa_fceq_w(a: v4f32, b: v4f32) -> v4i32 { msa_fceq_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding in 'a' (two 64-bit floating point numbers) /// and 'b' (two 64-bit floating point numbers) elementsare ordered and equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fceq.d))] pub unsafe fn __msa_fceq_d(a: v2f64, b: v2f64) -> v2i64 { msa_fceq_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Class Mask /// /// Store in each element of vector (four signed 32-bit integer numbers) /// a bit mask reflecting the floating-point class of the corresponding element of vector /// 'a' (four 32-bit floating point numbers). /// The mask has 10 bits as follows. Bits 0 and 1 indicate NaN values: signaling NaN (bit 0) and quiet NaN (bit 1). /// Bits 2, 3, 4, 5 classify negative values: infinity (bit 2), normal (bit 3), subnormal (bit 4), and zero (bit 5). /// Bits 6, 7, 8, 9classify positive values:infinity (bit 6), normal (bit 7), subnormal (bit 8), and zero (bit 9). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fclass.w))] pub unsafe fn __msa_fclass_w(a: v4f32) -> v4i32 { msa_fclass_w(a) } /// Vector Floating-Point Class Mask /// /// Store in each element of vector (wto signed 64-bit integer numbers) /// a bit mask reflecting the floating-point class of the corresponding element of vector /// 'a' (wto 64-bit floating point numbers). /// The mask has 10 bits as follows. Bits 0 and 1 indicate NaN values: signaling NaN (bit 0) and quiet NaN (bit 1). /// Bits 2, 3, 4, 5 classify negative values: infinity (bit 2), normal (bit 3), subnormal (bit 4), and zero (bit 5). /// Bits 6, 7, 8, 9classify positive values:infinity (bit 6), normal (bit 7), subnormal (bit 8), and zero (bit 9). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fclass.d))] pub unsafe fn __msa_fclass_d(a: v2f64) -> v2i64 { msa_fclass_d(a) } /// Vector Floating-Point Quiet Compare Less or Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding 'a' (four 32-bit floating point numbers)elements are ordered /// and either less than or equal to 'b' (four 32-bit floating point numbers)elements /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcle.w))] pub unsafe fn __msa_fcle_w(a: v4f32, b: v4f32) -> v4i32 { msa_fcle_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Less or Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding 'a' (two 64-bit floating point numbers) elementsare ordered /// and either less than or equal to 'b' (two 64-bit floating point numbers) elements /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcle.d))] pub unsafe fn __msa_fcle_d(a: v2f64, b: v2f64) -> v2i64 { msa_fcle_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Less Than /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding 'a' (four 32-bit floating point numbers)elements are ordered /// and less than 'b' (four 32-bit floating point numbers)elements /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fclt.w))] pub unsafe fn __msa_fclt_w(a: v4f32, b: v4f32) -> v4i32 { msa_fclt_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Less Than /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding 'a' (two 64-bit floating point numbers) elementsare ordered /// and less than 'b' (two 64-bit floating point numbers) elements /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fclt.d))] pub unsafe fn __msa_fclt_d(a: v2f64, b: v2f64) -> v2i64 { msa_fclt_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Not Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding 'a' (four 32-bit floating point numbers)and /// 'b' (four 32-bit floating point numbers)elements are ordered and not equal /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcne.w))] pub unsafe fn __msa_fcne_w(a: v4f32, b: v4f32) -> v4i32 { msa_fcne_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Not Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding 'a' (two 64-bit floating point numbers) and /// 'b' (two 64-bit floating point numbers) elementsare ordered and not equal /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcne.d))] pub unsafe fn __msa_fcne_d(a: v2f64, b: v2f64) -> v2i64 { msa_fcne_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Ordered /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding 'a' (four 32-bit floating point numbers)and /// 'b' (four 32-bit floating point numbers)elements are ordered, i.e. both elementsare not NaN values, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcor.w))] pub unsafe fn __msa_fcor_w(a: v4f32, b: v4f32) -> v4i32 { msa_fcor_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Ordered /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding 'a' (two 64-bit floating point numbers) and /// 'b' (two 64-bit floating point numbers) elementsare ordered, i.e. both elementsare not NaN values, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcor.d))] pub unsafe fn __msa_fcor_d(a: v2f64, b: v2f64) -> v2i64 { msa_fcor_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered or Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding 'a' (four 32-bit floating point numbers)and /// 'b' (four 32-bit floating point numbers)elements are unordered or equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcueq.w))] pub unsafe fn __msa_fcueq_w(a: v4f32, b: v4f32) -> v4i32 { msa_fcueq_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered or Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding 'a' (two 64-bit floating point numbers) and /// 'b' (two 64-bit floating point numbers) elementsare unordered or equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcueq.d))] pub unsafe fn __msa_fcueq_d(a: v2f64, b: v2f64) -> v2i64 { msa_fcueq_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered or Less or Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding elements in 'a' (four 32-bit floating point numbers) /// are unordered or less than or equal to 'b' (four 32-bit floating point numbers)elements, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcule.w))] pub unsafe fn __msa_fcule_w(a: v4f32, b: v4f32) -> v4i32 { msa_fcule_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered or Less or Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding elements in 'a' (two 64-bit floating point numbers) /// are unordered or less than or equal to 'b' (two 64-bit floating point numbers) elements, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcule.d))] pub unsafe fn __msa_fcule_d(a: v2f64, b: v2f64) -> v2i64 { msa_fcule_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered or Less Than /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding elements in 'a' (four 32-bit floating point numbers) /// are unordered or less than 'b' (four 32-bit floating point numbers)elements, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcult.w))] pub unsafe fn __msa_fcult_w(a: v4f32, b: v4f32) -> v4i32 { msa_fcult_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered or Less Than /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding elements in 'a' (two 64-bit floating point numbers) /// are unordered or less than 'b' (two 64-bit floating point numbers) elements, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcult.d))] pub unsafe fn __msa_fcult_d(a: v2f64, b: v2f64) -> v2i64 { msa_fcult_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding 'a' (four 32-bit floating point numbers) /// and 'b' (four 32-bit floating point numbers)elements are unordered, /// i.e. at least oneelement is a NaN value, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcun.w))] pub unsafe fn __msa_fcun_w(a: v4f32, b: v4f32) -> v4i32 { msa_fcun_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding 'a' (two 64-bit floating point numbers) /// and 'b' (two 64-bit floating point numbers) elementsare unordered, /// i.e. at least oneelement is a NaN value, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcun.d))] pub unsafe fn __msa_fcun_d(a: v2f64, b: v2f64) -> v2i64 { msa_fcun_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered or Not Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) /// elements if the corresponding 'a' (four 32-bit floating point numbers) /// and 'b' (four 32-bit floating point numbers)elements are unordered or not equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcune.w))] pub unsafe fn __msa_fcune_w(a: v4f32, b: v4f32) -> v4i32 { msa_fcune_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Quiet Compare Unordered or Not Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) /// elements if the corresponding 'a' (two 64-bit floating point numbers) /// and 'b' (two 64-bit floating point numbers) elementsare unordered or not equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fcune.d))] pub unsafe fn __msa_fcune_d(a: v2f64, b: v2f64) -> v2i64 { msa_fcune_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Division /// /// The floating-point elements in vector 'a' (four 32-bit floating point numbers) /// are divided by the floating-point elements in vector 'b' (four 32-bit floating point numbers) /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fdiv.w))] pub unsafe fn __msa_fdiv_w(a: v4f32, b: v4f32) -> v4f32 { msa_fdiv_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Division /// /// The floating-point elements in vector 'a' (two 64-bit floating point numbers) /// are divided by the floating-point elements in vector 'b' (two 64-bit floating point numbers) /// The result is written to vector (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fdiv.d))] pub unsafe fn __msa_fdiv_d(a: v2f64, b: v2f64) -> v2f64 { msa_fdiv_d(a, ::mem::transmute(b)) } /* FIXME: 16-bit float /// Vector Floating-Point Down-Convert Interchange Format /// /// The floating-point elements in vector 'a' (four 64-bit floating point numbers) /// and vector 'b' (four 64-bit floating point numbers) are down-converted /// to a smaller interchange format, i.e. from 64-bitto 32-bit, or from 32-bit to 16-bit. /// The result is written to vector (8 16-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fexdo.h))] pub unsafe fn __msa_fexdo_h(a: v4f32, b: v4f32) -> f16x8 { msa_fexdo_h(a, ::mem::transmute(b)) }*/ /// Vector Floating-Point Down-Convert Interchange Format /// /// The floating-point elements in vector 'a' (two 64-bit floating point numbers) /// and vector 'b' (two 64-bit floating point numbers) are down-converted /// to a smaller interchange format, i.e. from 64-bitto 32-bit, or from 32-bit to 16-bit. /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fexdo.w))] pub unsafe fn __msa_fexdo_w(a: v2f64, b: v2f64) -> v4f32 { msa_fexdo_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Down-Convert Interchange Format /// /// The floating-point elements in vector 'a' (four 32-bit floating point numbers) /// are scaled, i.e. multiplied, by 2 to the power of integer elements in vector 'b' /// (four signed 32-bit integer numbers). /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fexp2.w))] pub unsafe fn __msa_fexp2_w(a: v4f32, b: v4i32) -> v4f32 { msa_fexp2_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Down-Convert Interchange Format /// /// The floating-point elements in vector 'a' (two 64-bit floating point numbers) /// are scaled, i.e. multiplied, by 2 to the power of integer elements in vector 'b' /// (two signed 64-bit integer numbers). /// The result is written to vector (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fexp2.d))] pub unsafe fn __msa_fexp2_d(a: v2f64, b: v2i64) -> v2f64 { msa_fexp2_d(a, ::mem::transmute(b)) } /* FIXME: 16-bit float /// Vector Floating-Point Up-Convert Interchange Format Left /// /// The left half floating-point elements in vector 'a' (two 16-bit floating point numbers) /// are up-converted to a larger interchange format, /// i.e. from 16-bit to 32-bit, or from 32-bit to 64-bit. /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fexupl.w))] pub unsafe fn __msa_fexupl_w(a: f16x8) -> v4f32 { msa_fexupl_w(a) }*/ /// Vector Floating-Point Up-Convert Interchange Format Left /// /// The left half floating-point elements in vector 'a' (four 32-bit floating point numbers) /// are up-converted to a larger interchange format, /// i.e. from 16-bit to 32-bit, or from 32-bit to 64-bit. /// The result is written to vector (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fexupl.d))] pub unsafe fn __msa_fexupl_d(a: v4f32) -> v2f64 { msa_fexupl_d(a) } /* FIXME: 16-bit float /// Vector Floating-Point Up-Convert Interchange Format Left /// /// The right half floating-point elements in vector 'a' (two 16-bit floating point numbers) /// are up-converted to a larger interchange format, /// i.e. from 16-bit to 32-bit, or from 32-bit to 64-bit. /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fexupr.w))] pub unsafe fn __msa_fexupr_w(a: f16x8) -> v4f32 { msa_fexupr_w(a) } */ /// Vector Floating-Point Up-Convert Interchange Format Left /// /// The right half floating-point elements in vector 'a' (four 32-bit floating point numbers) /// are up-converted to a larger interchange format, /// i.e. from 16-bit to 32-bit, or from 32-bit to 64-bit. /// The result is written to vector (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fexupr.d))] pub unsafe fn __msa_fexupr_d(a: v4f32) -> v2f64 { msa_fexupr_d(a) } /// Vector Floating-Point Round and Convert from Signed Integer /// /// The signed integer elements in vector 'a' (four signed 32-bit integer numbers) /// are converted to floating-point values. /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ffint_s.w))] pub unsafe fn __msa_ffint_s_w(a: v4i32) -> v4f32 { msa_ffint_s_w(a) } /// Vector Floating-Point Round and Convert from Signed Integer /// /// The signed integer elements in vector 'a' (two signed 64-bit integer numbers) /// are converted to floating-point values. /// The result is written to vector (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ffint_s.d))] pub unsafe fn __msa_ffint_s_d(a: v2i64) -> v2f64 { msa_ffint_s_d(a) } /// Vector Floating-Point Round and Convert from Unsigned Integer /// /// The unsigned integer elements in vector 'a' (four unsigned 32-bit integer numbers) /// are converted to floating-point values. /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ffint_u.w))] pub unsafe fn __msa_ffint_u_w(a: v4u32) -> v4f32 { msa_ffint_u_w(a) } /// Vector Floating-Point Round and Convert from Unsigned Integer /// /// The unsigned integer elements in vector 'a' (two unsigned 64-bit integer numbers) /// are converted to floating-point values. /// The result is written to vector (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ffint_u.d))] pub unsafe fn __msa_ffint_u_d(a: v2u64) -> v2f64 { msa_ffint_u_d(a) } /// Vector Floating-Point Convert from Fixed-Point Left /// /// The left half fixed-point elements in vector 'a' (eight signed 16-bit integer numbers) /// are up-converted to floating-point data format. /// i.e. from 16-bit Q15 to 32-bit floating-point, or from 32-bit Q31 to 64-bit floating-point. /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ffql.w))] pub unsafe fn __msa_ffql_w(a: v8i16) -> v4f32 { msa_ffql_w(a) } /// Vector Floating-Point Convert from Fixed-Point Left /// /// The left half fixed-point elements in vector 'a' (four signed 32-bit integer numbers) /// are up-converted to floating-point data format. /// i.e. from 16-bit Q15 to 32-bit floating-point, or from 32-bit Q31 to 64-bit floating-point. /// The result is written to vector (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ffql.d))] pub unsafe fn __msa_ffql_d(a: v4i32) -> v2f64 { msa_ffql_d(a) } /// Vector Floating-Point Convert from Fixed-Point Left /// /// The right half fixed-point elements in vector 'a' (eight signed 16-bit integer numbers) /// are up-converted to floating-point data format. /// i.e. from 16-bit Q15 to 32-bit floating-point, or from 32-bit Q31 to 64-bit floating-point. /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ffqr.w))] pub unsafe fn __msa_ffqr_w(a: v8i16) -> v4f32 { msa_ffqr_w(a) } /// Vector Floating-Point Convert from Fixed-Point Left /// /// The right half fixed-point elements in vector 'a' (four signed 32-bit integer numbers) /// are up-converted to floating-point data format. /// i.e. from 16-bit Q15 to 32-bit floating-point, or from 32-bit Q31 to 64-bit floating-point. /// The result is written to vector (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ffqr.d))] pub unsafe fn __msa_ffqr_d(a: v4i32) -> v2f64 { msa_ffqr_d(a) } /// Vector Fill from GPR /// /// Replicate GPR rs value to all elements in vector (sixteen signed 8-bit integer numbers). /// If the source GPR is wider than the destination data format, the destination's elements /// will be set to the least significant bits of the GPR /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fill.b))] pub unsafe fn __msa_fill_b(a: i32) -> v16i8 { msa_fill_b(a) } /// Vector Fill from GPR /// /// Replicate GPR rs value to all elements in vector (eight signed 16-bit integer numbers). /// If the source GPR is wider than the destination data format, the destination's elements /// will be set to the least significant bits of the GPR /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fill.h))] pub unsafe fn __msa_fill_h(a: i32) -> v8i16 { msa_fill_h(a) } /// Vector Fill from GPR /// /// Replicate GPR rs value to all elements in vector (four signed 32-bit integer numbers). /// If the source GPR is wider than the destination data format, the destination's elements /// will be set to the least significant bits of the GPR /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fill.w))] pub unsafe fn __msa_fill_w(a: i32) -> v4i32 { msa_fill_w(a) } /// Vector Fill from GPR /// /// Replicate GPR rs value to all elements in vector (two signed 64-bit integer numbers). /// If the source GPR is wider than the destination data format, the destination's elements /// will be set to the least significant bits of the GPR /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fill.d))] pub unsafe fn __msa_fill_d(a: i64) -> v2i64 { msa_fill_d(a) } /// Vector Floating-Point Base 2 Logarithm /// /// The signed integral base 2 exponents of floating-point elements in vector 'a' /// (four 32-bit floating point numbers)are written as floating-point values to vector elements /// (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(flog2.w))] pub unsafe fn __msa_flog2_w(a: v4f32) -> v4f32 { msa_flog2_w(a) } /// Vector Floating-Point Base 2 Logarithm /// /// The signed integral base 2 exponents of floating-point elements in vector 'a' /// (two 64-bit floating point numbers) are written as floating-point values to vector elements /// (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(flog2.d))] pub unsafe fn __msa_flog2_d(a: v2f64) -> v2f64 { msa_flog2_d(a) } /// Vector Floating-Point Multiply-Add /// /// The floating-point elements in vector 'b' (four 32-bit floating point numbers) /// multiplied by floating-point elements in vector 'c' (four 32-bit floating point numbers) /// are added to the floating-point elements in vector 'a' (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmadd.w))] pub unsafe fn __msa_fmadd_w(a: v4f32, b: v4f32, c: v4f32) -> v4f32 { msa_fmadd_w(a, ::mem::transmute(b), c) } /// Vector Floating-Point Multiply-Add /// /// The floating-point elements in vector 'b' (two 64-bit floating point numbers) /// multiplied by floating-point elements in vector 'c' (two 64-bit floating point numbers) /// are added to the floating-point elements in vector 'a' (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmadd.d))] pub unsafe fn __msa_fmadd_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64 { msa_fmadd_d(a, ::mem::transmute(b), c) } /// Vector Floating-Point Maximum /// /// The largest values between corresponding floating-point elements in vector 'a' /// (four 32-bit floating point numbers)andvector 'b' (four 32-bit floating point numbers) /// are written to vector (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmax.w))] pub unsafe fn __msa_fmax_w(a: v4f32, b: v4f32) -> v4f32 { msa_fmax_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Maximum /// /// The largest values between corresponding floating-point elements in vector 'a' /// (two 64-bit floating point numbers) and vector 'b' (two 64-bit floating point numbers) /// are written to vector (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmax.d))] pub unsafe fn __msa_fmax_d(a: v2f64, b: v2f64) -> v2f64 { msa_fmax_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Maximum Based on Absolute Values /// /// The value with the largest magnitude, i.e. absolute value, between corresponding /// floating-point elements in vector 'a' (four 32-bit floating point numbers) /// and vector 'b' (four 32-bit floating point numbers) /// are written to vector (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmax_a.w))] pub unsafe fn __msa_fmax_a_w(a: v4f32, b: v4f32) -> v4f32 { msa_fmax_a_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Maximum Based on Absolute Values /// /// The value with the largest magnitude, i.e. absolute value, between corresponding /// floating-point elements in vector 'a' (two 64-bit floating point numbers) /// and vector 'b' (two 64-bit floating point numbers) /// are written to vector (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmax_a.d))] pub unsafe fn __msa_fmax_a_d(a: v2f64, b: v2f64) -> v2f64 { msa_fmax_a_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Minimum /// /// The smallest values between corresponding floating-point elements in vector 'a' /// (four 32-bit floating point numbers)andvector 'b' (four 32-bit floating point numbers) /// are written to vector (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmin.w))] pub unsafe fn __msa_fmin_w(a: v4f32, b: v4f32) -> v4f32 { msa_fmin_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Minimum /// /// The smallest values between corresponding floating-point elements in vector 'a' /// (two 64-bit floating point numbers) and vector 'b' (two 64-bit floating point numbers) /// are written to vector (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmin.d))] pub unsafe fn __msa_fmin_d(a: v2f64, b: v2f64) -> v2f64 { msa_fmin_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Minimum Based on Absolute Values /// /// The value with the smallest magnitude, i.e. absolute value, between corresponding /// floating-point elements in vector 'a' (four 32-bit floating point numbers) /// and vector 'b' (four 32-bit floating point numbers) /// are written to vector (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmin_a.w))] pub unsafe fn __msa_fmin_a_w(a: v4f32, b: v4f32) -> v4f32 { msa_fmin_a_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Minimum Based on Absolute Values /// /// The value with the smallest magnitude, i.e. absolute value, between corresponding /// floating-point elements in vector 'a' (two 64-bit floating point numbers) /// and vector 'b' (two 64-bit floating point numbers) /// are written to vector (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmin_a.d))] pub unsafe fn __msa_fmin_a_d(a: v2f64, b: v2f64) -> v2f64 { msa_fmin_a_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Multiply-Sub /// /// The floating-point elements in vector 'b' (four 32-bit floating point numbers) /// multiplied by floating-point elements in vector 'c' (four 32-bit floating point numbers) /// are subtracted from the floating-point elements in vector 'a' (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmsub.w))] pub unsafe fn __msa_fmsub_w(a: v4f32, b: v4f32, c: v4f32) -> v4f32 { msa_fmsub_w(a, ::mem::transmute(b), c) } /// Vector Floating-Point Multiply-Sub /// /// The floating-point elements in vector 'b' (two 64-bit floating point numbers) /// multiplied by floating-point elements in vector 'c' (two 64-bit floating point numbers) /// are subtracted from the floating-point elements in vector 'a' (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmsub.d))] pub unsafe fn __msa_fmsub_d(a: v2f64, b: v2f64, c: v2f64) -> v2f64 { msa_fmsub_d(a, ::mem::transmute(b), c) } /// Vector Floating-Point Multiplication /// /// The floating-point elements in vector 'a' (four 32-bit floating point numbers)are /// multiplied by floating-point elements in vector 'b' (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmul.w))] pub unsafe fn __msa_fmul_w(a: v4f32, b: v4f32) -> v4f32 { msa_fmul_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Multiplication /// /// The floating-point elements in vector 'a' (two 64-bit floating point numbers) are /// multiplied by floating-point elements in vector 'b' (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fmul.d))] pub unsafe fn __msa_fmul_d(a: v2f64, b: v2f64) -> v2f64 { msa_fmul_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Round to Integer /// /// The floating-point elements in vector 'a' (four 32-bit floating point numbers) /// are rounded to an integral valued floating-point number in the same format based /// on the rounding mode bits RM in MSA Control and Status Register MSACSR. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(frint.w))] pub unsafe fn __msa_frint_w(a: v4f32) -> v4f32 { msa_frint_w(a) } /// Vector Floating-Point Round to Integer /// /// The floating-point elements in vector 'a' (two 64-bit floating point numbers) /// are rounded to an integral valued floating-point number in the same format based /// on the rounding mode bits RM in MSA Control and Status Register MSACSR. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(frint.d))] pub unsafe fn __msa_frint_d(a: v2f64) -> v2f64 { msa_frint_d(a) } /// Vector Approximate Floating-Point Reciprocal /// /// The reciprocals of floating-point elements in vector 'a' (four 32-bit floating point numbers) /// are calculated and the result is written to vector (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(frcp.w))] pub unsafe fn __msa_frcp_w(a: v4f32) -> v4f32 { msa_frcp_w(a) } /// Vector Approximate Floating-Point Reciprocal /// /// The reciprocals of floating-point elements in vector 'a' (two 64-bit floating point numbers) /// are calculated and the result is written to vector (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(frcp.d))] pub unsafe fn __msa_frcp_d(a: v2f64) -> v2f64 { msa_frcp_d(a) } /// Vector Approximate Floating-Point Reciprocal of Square Root /// /// The reciprocals of the square roots of floating-point elements in vector 'a' (four 32-bit floating point numbers) /// are calculated and the result is written to vector (four 32-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(frsqrt.w))] pub unsafe fn __msa_frsqrt_w(a: v4f32) -> v4f32 { msa_frsqrt_w(a) } /// Vector Approximate Floating-Point Reciprocal of Square Root /// /// The reciprocals of the square roots of floating-point elements in vector 'a' (two 64-bit floating point numbers) /// are calculated and the result is written to vector (two 64-bit floating point numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(frsqrt.d))] pub unsafe fn __msa_frsqrt_d(a: v2f64) -> v2f64 { msa_frsqrt_d(a) } /// Vector Floating-Point Signaling Compare Always False /// /// Set all bits to 0 in vector (four signed 32-bit integer numbers) elements. /// Signaling and quiet NaN elements in vector 'a' (four 32-bit floating point numbers) /// or 'b' (four 32-bit floating point numbers)signal Invalid Operation exception. /// In case of a floating-point exception, the default result has all bits set to 0 /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsaf.w))] pub unsafe fn __msa_fsaf_w(a: v4f32, b: v4f32) -> v4i32 { msa_fsaf_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Always False /// /// Set all bits to 0 in vector (two signed 64-bit integer numbers) elements. /// Signaling and quiet NaN elements in vector 'a' (two 64-bit floating point numbers) /// or 'b' (two 64-bit floating point numbers) signal Invalid Operation exception. /// In case of a floating-point exception, the default result has all bits set to 0 /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsaf.d))] pub unsafe fn __msa_fsaf_d(a: v2f64, b: v2f64) -> v2i64 { msa_fsaf_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers) /// and 'b' (four 32-bit floating point numbers)elements are equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fseq.w))] pub unsafe fn __msa_fseq_w(a: v4f32, b: v4f32) -> v4i32 { msa_fseq_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) /// and 'b' (two 64-bit floating point numbers) elementsare equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fseq.d))] pub unsafe fn __msa_fseq_d(a: v2f64, b: v2f64) -> v2i64 { msa_fseq_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Less or Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers)elements /// are less than or equal to 'b' (four 32-bit floating point numbers)elements, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsle.w))] pub unsafe fn __msa_fsle_w(a: v4f32, b: v4f32) -> v4i32 { msa_fsle_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Less or Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) elements /// are less than or equal to 'b' (two 64-bit floating point numbers) elements, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsle.d))] pub unsafe fn __msa_fsle_d(a: v2f64, b: v2f64) -> v2i64 { msa_fsle_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Less Than /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers)elements /// are less than 'b' (four 32-bit floating point numbers)elements, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fslt.w))] pub unsafe fn __msa_fslt_w(a: v4f32, b: v4f32) -> v4i32 { msa_fslt_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Less Than /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) elements /// are less than 'b' (two 64-bit floating point numbers) elements, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fslt.d))] pub unsafe fn __msa_fslt_d(a: v2f64, b: v2f64) -> v2i64 { msa_fslt_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Not Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers)and /// 'b' (four 32-bit floating point numbers)elements are not equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsne.w))] pub unsafe fn __msa_fsne_w(a: v4f32, b: v4f32) -> v4i32 { msa_fsne_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Not Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) and /// 'b' (two 64-bit floating point numbers) elementsare not equal, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsne.d))] pub unsafe fn __msa_fsne_d(a: v2f64, b: v2f64) -> v2i64 { msa_fsne_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Ordered /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers)and /// 'b' (four 32-bit floating point numbers)elements are ordered, /// i.e. both elementsare not NaN values, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsor.w))] pub unsafe fn __msa_fsor_w(a: v4f32, b: v4f32) -> v4i32 { msa_fsor_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Ordered /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) and /// 'b' (two 64-bit floating point numbers) elementsare ordered, /// i.e. both elementsare not NaN values, otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsor.d))] pub unsafe fn __msa_fsor_d(a: v2f64, b: v2f64) -> v2i64 { msa_fsor_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Square Root /// /// The square roots of floating-point elements in vector 'a' /// (four 32-bit floating point numbers)are written to vector /// (four 32-bit floating point numbers)elements are ordered, /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsqrt.w))] pub unsafe fn __msa_fsqrt_w(a: v4f32) -> v4f32 { msa_fsqrt_w(a) } /// Vector Floating-Point Square Root /// /// The square roots of floating-point elements in vector 'a' /// (two 64-bit floating point numbers) are written to vector /// (two 64-bit floating point numbers) elementsare ordered, /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsqrt.d))] pub unsafe fn __msa_fsqrt_d(a: v2f64) -> v2f64 { msa_fsqrt_d(a) } /// Vector Floating-Point Subtraction /// /// The floating-point elements in vector 'b' (four 32-bit floating point numbers) /// are subtracted from the floating-point elements in vector 'a' /// (four 32-bit floating point numbers). /// The result is written to vector (four 32-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsub.w))] pub unsafe fn __msa_fsub_w(a: v4f32, b: v4f32) -> v4f32 { msa_fsub_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Subtraction /// /// The floating-point elements in vector 'b' (two 64-bit floating point numbers) /// are subtracted from the floating-point elements in vector 'a' /// (two 64-bit floating point numbers). /// The result is written to vector (two 64-bit floating point numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsub.d))] pub unsafe fn __msa_fsub_d(a: v2f64, b: v2f64) -> v2f64 { msa_fsub_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Ordered /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers)and /// 'b' (four 32-bit floating point numbers)elements are unordered or equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsueq.w))] pub unsafe fn __msa_fsueq_w(a: v4f32, b: v4f32) -> v4i32 { msa_fsueq_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Ordered /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) and /// 'b' (two 64-bit floating point numbers) elementsare unordered or equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsueq.d))] pub unsafe fn __msa_fsueq_d(a: v2f64, b: v2f64) -> v2i64 { msa_fsueq_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Unordered or Less or Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers)elements are /// unordered or less than or equal to 'b' (four 32-bit floating point numbers)elements /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsule.w))] pub unsafe fn __msa_fsule_w(a: v4f32, b: v4f32) -> v4i32 { msa_fsule_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Unordered or Less or Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) elementsare /// unordered or less than or equal to 'b' (two 64-bit floating point numbers) elements /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsule.d))] pub unsafe fn __msa_fsule_d(a: v2f64, b: v2f64) -> v2i64 { msa_fsule_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Unordered or Less Than /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers)elements /// are unordered or less than 'b' (four 32-bit floating point numbers)elements /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsult.w))] pub unsafe fn __msa_fsult_w(a: v4f32, b: v4f32) -> v4i32 { msa_fsult_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Unordered or Less Than /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) elements /// are unordered or less than 'b' (two 64-bit floating point numbers) elements /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsult.d))] pub unsafe fn __msa_fsult_d(a: v2f64, b: v2f64) -> v2i64 { msa_fsult_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Unordered /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers)and /// 'b' (four 32-bit floating point numbers)elements are unordered, /// i.e. at least one element is a NaN value otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsun.w))] pub unsafe fn __msa_fsun_w(a: v4f32, b: v4f32) -> v4i32 { msa_fsun_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Unordered /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) and /// 'b' (two 64-bit floating point numbers) elementsare unordered, /// i.e. at least one element is a NaN value otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsun.d))] pub unsafe fn __msa_fsun_d(a: v2f64, b: v2f64) -> v2i64 { msa_fsun_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Unordered or Not Equal /// /// Set all bits to 1 in vector (four signed 32-bit integer numbers) elements /// if the corresponding 'a' (four 32-bit floating point numbers)and /// 'b' (four 32-bit floating point numbers)elements are unordered or not equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsune.w))] pub unsafe fn __msa_fsune_w(a: v4f32, b: v4f32) -> v4i32 { msa_fsune_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Signaling Compare Unordered or Not Equal /// /// Set all bits to 1 in vector (two signed 64-bit integer numbers) elements /// if the corresponding 'a' (two 64-bit floating point numbers) and /// 'b' (two 64-bit floating point numbers) elementsare unordered or not equal, /// otherwise set all bits to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(fsune.d))] pub unsafe fn __msa_fsune_d(a: v2f64, b: v2f64) -> v2i64 { msa_fsune_d(a, ::mem::transmute(b)) } /// Vector Floating-Point Convert to Signed Integer /// ///The elements in vector 'a' (four 32-bit floating point numbers) /// are rounded and converted to signed integer values based on the /// rounding mode bits RM in MSA Control and Status Register MSACSR. /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftint_s.w))] pub unsafe fn __msa_ftint_s_w(a: v4f32) -> v4i32 { msa_ftint_s_w(a) } /// Vector Floating-Point Convert to Signed Integer /// ///The elements in vector 'a' (two 64-bit floating point numbers) /// are rounded and converted to signed integer values based on the /// rounding mode bits RM in MSA Control and Status Register MSACSR. /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftint_s.d))] pub unsafe fn __msa_ftint_s_d(a: v2f64) -> v2i64 { msa_ftint_s_d(a) } /// Vector Floating-Point Convert to Unsigned Integer /// /// The elements in vector 'a' (four 32-bit floating point numbers) /// are rounded and converted to signed integer values based on the /// rounding mode bits RM in MSA Control and Status Register MSACSR. /// The result is written to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftint_u.w))] pub unsafe fn __msa_ftint_u_w(a: v4f32) -> v4u32 { msa_ftint_u_w(a) } /// Vector Floating-Point Convert to Unsigned Integer /// /// The elements in vector 'a' (two 64-bit floating point numbers) /// are rounded and converted to signed integer values based on the /// rounding mode bits RM in MSA Control and Status Register MSACSR. /// The result is written to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftint_u.d))] pub unsafe fn __msa_ftint_u_d(a: v2f64) -> v2u64 { msa_ftint_u_d(a) } /// Vector Floating-Point Convert to Fixed-Point /// /// The elements in vector 'a' (four 32-bit floating point numbers) /// and 'b' (four 32-bit floating point numbers)are down-converted to a fixed-point /// representation, i.e. from 64-bit floating-point to 32-bit Q31 fixed-point /// representation, or from 32-bit floating-point to 16-bit Q15 fixed-point representation. /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftq.h))] pub unsafe fn __msa_ftq_h(a: v4f32, b: v4f32) -> v8i16 { msa_ftq_h(a, ::mem::transmute(b)) } /// Vector Floating-Point Convert to Fixed-Point /// /// The elements in vector 'a' (two 64-bit floating point numbers) /// and 'b' (two 64-bit floating point numbers) are down-converted to a fixed-point /// representation, i.e. from 64-bit floating-point to 32-bit Q31 fixed-point /// representation, or from 32-bit floating-point to 16-bit Q15 fixed-point representation. /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftq.w))] pub unsafe fn __msa_ftq_w(a: v2f64, b: v2f64) -> v4i32 { msa_ftq_w(a, ::mem::transmute(b)) } /// Vector Floating-Point Truncate and Convert to Signed Integer /// /// The elements in vector 'a' (four 32-bit floating point numbers) /// are truncated, i.e. rounded toward zero, to signed integer values. /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftrunc_s.w))] pub unsafe fn __msa_ftrunc_s_w(a: v4f32) -> v4i32 { msa_ftrunc_s_w(a) } /// Vector Floating-Point Truncate and Convert to Signed Integer /// /// The elements in vector 'a' (two 64-bit floating point numbers) /// are truncated, i.e. rounded toward zero, to signed integer values. /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftrunc_s.d))] pub unsafe fn __msa_ftrunc_s_d(a: v2f64) -> v2i64 { msa_ftrunc_s_d(a) } /// Vector Floating-Point Truncate and Convert to Unsigned Integer /// /// The elements in vector 'a' (four 32-bit floating point numbers) /// are truncated, i.e. rounded toward zero, to unsigned integer values. /// The result is written to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftrunc_u.w))] pub unsafe fn __msa_ftrunc_u_w(a: v4f32) -> v4u32 { msa_ftrunc_u_w(a) } /// Vector Floating-Point Truncate and Convert to Unsigned Integer /// /// The elements in vector 'a' (two 64-bit floating point numbers) /// are truncated, i.e. rounded toward zero, to unsigned integer values. /// The result is written to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ftrunc_u.d))] pub unsafe fn __msa_ftrunc_u_d(a: v2f64) -> v2u64 { msa_ftrunc_u_d(a) } /// Vector Signed Horizontal Add /// /// The sign-extended odd elements in vector 'a' (sixteen signed 8-bit integer numbers) /// are added to the sign-extended even elements in vector 'b' (sixteen signed 8-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hadd_s.h))] pub unsafe fn __msa_hadd_s_h(a: v16i8, b: v16i8) -> v8i16 { msa_hadd_s_h(a, ::mem::transmute(b)) } /// Vector Signed Horizontal Add /// /// The sign-extended odd elements in vector 'a' (eight signed 16-bit integer numbers) /// are added to the sign-extended even elements in vector 'b' (eight signed 16-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hadd_s.w))] pub unsafe fn __msa_hadd_s_w(a: v8i16, b: v8i16) -> v4i32 { msa_hadd_s_w(a, ::mem::transmute(b)) } /// Vector Signed Horizontal Add /// /// The sign-extended odd elements in vector 'a' (four signed 32-bit integer numbers) /// are added to the sign-extended even elements in vector 'b' (four signed 32-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hadd_s.d))] pub unsafe fn __msa_hadd_s_d(a: v4i32, b: v4i32) -> v2i64 { msa_hadd_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Horizontal Add /// /// The zero-extended odd elements in vector 'a' (sixteen unsigned 8-bit integer numbers) /// are added to the zero-extended even elements in vector 'b' (sixteen unsigned 8-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hadd_u.h))] pub unsafe fn __msa_hadd_u_h(a: v16u8, b: v16u8) -> v8u16 { msa_hadd_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Horizontal Add /// /// The zero-extended odd elements in vector 'a' (eight unsigned 16-bit integer numbers) /// are added to the zero-extended even elements in vector 'b' (eight unsigned 16-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hadd_u.w))] pub unsafe fn __msa_hadd_u_w(a: v8u16, b: v8u16) -> v4u32 { msa_hadd_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Horizontal Add /// /// The zero-extended odd elements in vector 'a' (four unsigned 32-bit integer numbers) /// are added to the zero-extended even elements in vector 'b' (four unsigned 32-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hadd_u.d))] pub unsafe fn __msa_hadd_u_d(a: v4u32, b: v4u32) -> v2u64 { msa_hadd_u_d(a, ::mem::transmute(b)) } /// Vector Signed Horizontal Subtract /// /// The sign-extended odd elements in vector 'b' (sixteen signed 8-bit integer numbers) /// are subtracted from the sign-extended elements in vector 'a' (sixteen signed 8-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hsub_s.h))] pub unsafe fn __msa_hsub_s_h(a: v16i8, b: v16i8) -> v8i16 { msa_hsub_s_h(a, ::mem::transmute(b)) } /// Vector Signed Horizontal Subtract /// /// The sign-extended odd elements in vector 'b' (eight signed 16-bit integer numbers) /// are subtracted from the sign-extended elements in vector 'a' (eight signed 16-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hsub_s.w))] pub unsafe fn __msa_hsub_s_w(a: v8i16, b: v8i16) -> v4i32 { msa_hsub_s_w(a, ::mem::transmute(b)) } /// Vector Signed Horizontal Subtract /// /// The sign-extended odd elements in vector 'b' (four signed 32-bit integer numbers) /// are subtracted from the sign-extended elements in vector 'a' (four signed 32-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hsub_s.d))] pub unsafe fn __msa_hsub_s_d(a: v4i32, b: v4i32) -> v2i64 { msa_hsub_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Horizontal Subtract /// /// The zero-extended odd elements in vector 'b' (sixteen unsigned 8-bit integer numbers) /// are subtracted from the zero-extended elements in vector 'a' (sixteen unsigned 8-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hsub_u.h))] pub unsafe fn __msa_hsub_u_h(a: v16u8, b: v16u8) -> v8i16 { msa_hsub_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Horizontal Subtract /// /// The zero-extended odd elements in vector 'b' (eight unsigned 16-bit integer numbers) /// are subtracted from the zero-extended elements in vector 'a' (eight unsigned 16-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hsub_u.w))] pub unsafe fn __msa_hsub_u_w(a: v8u16, b: v8u16) -> v4i32 { msa_hsub_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Horizontal Subtract /// /// The zero-extended odd elements in vector 'b' (four unsigned 32-bit integer numbers) /// are subtracted from the zero-extended elements in vector 'a' (four unsigned 32-bit integer numbers) /// producing aresult twice the size of the input operands. /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(hsub_u.d))] pub unsafe fn __msa_hsub_u_d(a: v4u32, b: v4u32) -> v2i64 { msa_hsub_u_d(a, ::mem::transmute(b)) } /// Vector Interleave Even /// /// Even elements in vectors 'a' (sixteen signed 8-bit integer numbers) /// and vector 'b' (sixteen signed 8-bit integer numbers) are copied to the result /// (sixteen signed 8-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvev.b))] pub unsafe fn __msa_ilvev_b(a: v16i8, b: v16i8) -> v16i8 { msa_ilvev_b(a, ::mem::transmute(b)) } /// Vector Interleave Even /// /// Even elements in vectors 'a' (eight signed 16-bit integer numbers) /// and vector 'b' (eight signed 16-bit integer numbers) are copied to the result /// (eight signed 16-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvev.h))] pub unsafe fn __msa_ilvev_h(a: v8i16, b: v8i16) -> v8i16 { msa_ilvev_h(a, ::mem::transmute(b)) } /// Vector Interleave Even /// /// Even elements in vectors 'a' (four signed 32-bit integer numbers) /// and vector 'b' (four signed 32-bit integer numbers) are copied to the result /// (four signed 32-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvev.w))] pub unsafe fn __msa_ilvev_w(a: v4i32, b: v4i32) -> v4i32 { msa_ilvev_w(a, ::mem::transmute(b)) } /// Vector Interleave Even /// /// Even elements in vectors 'a' (two signed 64-bit integer numbers) /// and vector 'b' (two signed 64-bit integer numbers) are copied to the result /// (two signed 64-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvev.d))] pub unsafe fn __msa_ilvev_d(a: v2i64, b: v2i64) -> v2i64 { msa_ilvev_d(a, ::mem::transmute(b)) } /// Vector Interleave Left /// /// The left half elements in vectors 'a' (sixteen signed 8-bit integer numbers) /// and vector 'b' (sixteen signed 8-bit integer numbers) are copied to the result /// (sixteen signed 8-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvl.b))] pub unsafe fn __msa_ilvl_b(a: v16i8, b: v16i8) -> v16i8 { msa_ilvl_b(a, ::mem::transmute(b)) } /// Vector Interleave Left /// /// The left half elements in vectors 'a' (eight signed 16-bit integer numbers) /// and vector 'b' (eight signed 16-bit integer numbers) are copied to the result /// (eight signed 16-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvl.h))] pub unsafe fn __msa_ilvl_h(a: v8i16, b: v8i16) -> v8i16 { msa_ilvl_h(a, ::mem::transmute(b)) } /// Vector Interleave Left /// /// The left half elements in vectors 'a' (four signed 32-bit integer numbers) /// and vector 'b' (four signed 32-bit integer numbers) are copied to the result /// (four signed 32-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvl.w))] pub unsafe fn __msa_ilvl_w(a: v4i32, b: v4i32) -> v4i32 { msa_ilvl_w(a, ::mem::transmute(b)) } /// Vector Interleave Left /// /// The left half elements in vectors 'a' (two signed 64-bit integer numbers) /// and vector 'b' (two signed 64-bit integer numbers) are copied to the result /// (two signed 64-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvl.d))] pub unsafe fn __msa_ilvl_d(a: v2i64, b: v2i64) -> v2i64 { msa_ilvl_d(a, ::mem::transmute(b)) } /// Vector Interleave Odd /// /// Odd elements in vectors 'a' (sixteen signed 8-bit integer numbers) /// and vector 'b' (sixteen signed 8-bit integer numbers) are copied to the result /// (sixteen signed 8-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvod.b))] pub unsafe fn __msa_ilvod_b(a: v16i8, b: v16i8) -> v16i8 { msa_ilvod_b(a, ::mem::transmute(b)) } /// Vector Interleave Odd /// /// Odd elements in vectors 'a' (eight signed 16-bit integer numbers) /// and vector 'b' (eight signed 16-bit integer numbers) are copied to the result /// (eight signed 16-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvod.h))] pub unsafe fn __msa_ilvod_h(a: v8i16, b: v8i16) -> v8i16 { msa_ilvod_h(a, ::mem::transmute(b)) } /// Vector Interleave Odd /// /// Odd elements in vectors 'a' (four signed 32-bit integer numbers) /// and vector 'b' (four signed 32-bit integer numbers) are copied to the result /// (four signed 32-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvod.w))] pub unsafe fn __msa_ilvod_w(a: v4i32, b: v4i32) -> v4i32 { msa_ilvod_w(a, ::mem::transmute(b)) } /// Vector Interleave Odd /// /// Odd elements in vectors 'a' (two signed 64-bit integer numbers) /// and vector 'b' (two signed 64-bit integer numbers) are copied to the result /// (two signed 64-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvod.d))] pub unsafe fn __msa_ilvod_d(a: v2i64, b: v2i64) -> v2i64 { msa_ilvod_d(a, ::mem::transmute(b)) } /// Vector Interleave Right /// /// The right half elements in vectors 'a' (sixteen signed 8-bit integer numbers) /// and vector 'b' (sixteen signed 8-bit integer numbers) are copied to the result /// (sixteen signed 8-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvr.b))] pub unsafe fn __msa_ilvr_b(a: v16i8, b: v16i8) -> v16i8 { msa_ilvr_b(a, ::mem::transmute(b)) } /// Vector Interleave Right /// /// The right half elements in vectors 'a' (eight signed 16-bit integer numbers) /// and vector 'b' (eight signed 16-bit integer numbers) are copied to the result /// (eight signed 16-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvr.h))] pub unsafe fn __msa_ilvr_h(a: v8i16, b: v8i16) -> v8i16 { msa_ilvr_h(a, ::mem::transmute(b)) } /// Vector Interleave Right /// /// The right half elements in vectors 'a' (four signed 32-bit integer numbers) /// and vector 'b' (four signed 32-bit integer numbers) are copied to the result /// (four signed 32-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvr.w))] pub unsafe fn __msa_ilvr_w(a: v4i32, b: v4i32) -> v4i32 { msa_ilvr_w(a, ::mem::transmute(b)) } /// Vector Interleave Right /// /// The right half elements in vectors 'a' (two signed 64-bit integer numbers) /// and vector 'b' (two signed 64-bit integer numbers) are copied to the result /// (two signed 64-bit integer numbers) /// alternating one element from 'a' with one element from 'b'. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ilvr.d))] pub unsafe fn __msa_ilvr_d(a: v2i64, b: v2i64) -> v2i64 { msa_ilvr_d(a, ::mem::transmute(b)) } /// GPR Insert Element /// /// Set element imm4 in vector 'a' (sixteen signed 8-bit integer numbers) to GPR 'c' value. /// All other elements in vector 'a' are unchanged. If the source GPR is wider than the /// destination data format, the destination's elements will be set to the least significant bits of the GPR. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(insert.b, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_insert_b(a: v16i8, imm4: i32, c: i32) -> v16i8 { macro_rules! call { ($imm4:expr) => { msa_insert_b(a, $imm4, c) }; } constify_imm4!(imm4, call) } /// GPR Insert Element /// /// Set element imm3 in vector 'a' (eight signed 16-bit integer numbers) to GPR 'c' value. /// All other elements in vector 'a' are unchanged. If the source GPR is wider than the /// destination data format, the destination's elements will be set to the least significant bits of the GPR. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(insert.h, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_insert_h(a: v8i16, imm3: i32, c: i32) -> v8i16 { macro_rules! call { ($imm3:expr) => { msa_insert_h(a, $imm3, c) }; } constify_imm3!(imm3, call) } /// GPR Insert Element /// /// Set element imm2 in vector 'a' (four signed 32-bit integer numbers) to GPR 'c' value. /// All other elements in vector 'a' are unchanged. If the source GPR is wider than the /// destination data format, the destination's elements will be set to the least significant bits of the GPR. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(insert.w, imm2 = 0b11))] #[rustc_args_required_const(1)] pub unsafe fn __msa_insert_w(a: v4i32, imm2: i32, c: i32) -> v4i32 { macro_rules! call { ($imm2:expr) => { msa_insert_w(a, $imm2, c) }; } constify_imm2!(imm2, call) } /// GPR Insert Element /// /// Set element imm1 in vector 'a' (two signed 64-bit integer numbers) to GPR 'c' value. /// All other elements in vector 'a' are unchanged. If the source GPR is wider than the /// destination data format, the destination's elements will be set to the least significant bits of the GPR. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(insert.d, imm1 = 0b1))] #[rustc_args_required_const(1)] pub unsafe fn __msa_insert_d(a: v2i64, imm1: i32, c: i64) -> v2i64 { macro_rules! call { ($imm1:expr) => { msa_insert_d(a, $imm1, c) }; } constify_imm1!(imm1, call) } /// Element Insert Element /// /// Set element imm1 in the result vector 'a' (sixteen signed 8-bit integer numbers) to element 0 /// in vector 'c' (sixteen signed 8-bit integer numbers) value. /// All other elements in vector 'a' are unchanged. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(insve.b, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_insve_b(a: v16i8, imm4: i32, c: v16i8) -> v16i8 { macro_rules! call { ($imm4:expr) => { msa_insve_b(a, $imm4, c) }; } constify_imm4!(imm4, call) } /// Element Insert Element /// /// Set element imm1 in the result vector 'a' (eight signed 16-bit integer numbers) to element 0 /// in vector 'c' (eight signed 16-bit integer numbers) value. /// All other elements in vector 'a' are unchanged. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(insve.h, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_insve_h(a: v8i16, imm3: i32, c: v8i16) -> v8i16 { macro_rules! call { ($imm3:expr) => { msa_insve_h(a, $imm3, c) }; } constify_imm3!(imm3, call) } /// Element Insert Element /// /// Set element imm1 in the result vector 'a' (four signed 32-bit integer numbers) to element 0 /// in vector 'c' (four signed 32-bit integer numbers) value. /// All other elements in vector 'a' are unchanged. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(insve.w, imm2 = 0b11))] #[rustc_args_required_const(1)] pub unsafe fn __msa_insve_w(a: v4i32, imm2: i32, c: v4i32) -> v4i32 { macro_rules! call { ($imm2:expr) => { msa_insve_w(a, $imm2, c) }; } constify_imm2!(imm2, call) } /// Element Insert Element /// /// Set element imm1 in the result vector 'a' (two signed 64-bit integer numbers) to element 0 /// in vector 'c' (two signed 64-bit integer numbers) value. /// All other elements in vector 'a' are unchanged. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(insve.d, imm1 = 0b1))] #[rustc_args_required_const(1)] pub unsafe fn __msa_insve_d(a: v2i64, imm1: i32, c: v2i64) -> v2i64 { macro_rules! call { ($imm1:expr) => { msa_insve_d(a, $imm1, c) }; } constify_imm1!(imm1, call) } /// Vector Load /// /// The WRLEN / 8 bytes at the ef fective memory location addressed by the base /// mem_addr and the 10-bit signed immediate offset imm_s10 are fetched and placed in /// the vector (sixteen signed 8-bit integer numbers) value. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ld.b, imm_s10 = 0b1111111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_ld_b(mem_addr: *mut u8, imm_s10: i32) -> v16i8 { macro_rules! call { ($imm_s10:expr) => { msa_ld_b(mem_addr, $imm_s10) }; } constify_imm_s10!(imm_s10, call) } /// Vector Load /// /// The WRLEN / 8 bytes at the ef fective memory location addressed by the base /// mem_addr and the 10-bit signed immediate offset imm_s11 are fetched and placed in /// the vector (eight signed 16-bit integer numbers) value. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ld.h, imm_s11 = 0b11111111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_ld_h(mem_addr: *mut u8, imm_s11: i32) -> v8i16 { macro_rules! call { ($imm_s11:expr) => { msa_ld_h(mem_addr, $imm_s11) }; } constify_imm_s11!(imm_s11, call) } /// Vector Load /// /// The WRLEN / 8 bytes at the ef fective memory location addressed by the base /// mem_addr and the 10-bit signed immediate offset imm_s12 are fetched and placed in /// the vector (four signed 32-bit integer numbers) value. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ld.w, imm_s12 = 0b111111111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_ld_w(mem_addr: *mut u8, imm_s12: i32) -> v4i32 { macro_rules! call { ($imm_s12:expr) => { msa_ld_w(mem_addr, $imm_s12) }; } constify_imm_s12!(imm_s12, call) } /// Vector Load /// /// The WRLEN / 8 bytes at the ef fective memory location addressed by the base /// mem_addr and the 10-bit signed immediate offset imm_s13 are fetched and placed in /// the vector (two signed 64-bit integer numbers) value. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ld.d, imm_s13 = 0b1111111111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_ld_d(mem_addr: *mut u8, imm_s13: i32) -> v2i64 { macro_rules! call { ($imm_s13:expr) => { msa_ld_d(mem_addr, $imm_s13) }; } constify_imm_s13!(imm_s13, call) } /// Immediate Load /// /// The signed immediate imm_s10 is replicated in all vector /// (sixteen signed 8-bit integer numbers) elements. For byte elements, /// only the least significant 8 bits of imm_s10 will be used. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ldi.b, imm_s10 = 0b1111111111))] #[rustc_args_required_const(0)] pub unsafe fn __msa_ldi_b(imm_s10: i32) -> v16i8 { macro_rules! call { ($imm_s10:expr) => { msa_ldi_b($imm_s10) }; } constify_imm_s10!(imm_s10, call) } /// Immediate Load /// /// The signed immediate imm_s10 is replicated in all vector /// (eight signed 16-bit integer numbers) elements. For byte elements, /// only the least significant 8 bits of imm_s10 will be used. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ldi.h, imm_s10 = 0b1111111111))] #[rustc_args_required_const(0)] pub unsafe fn __msa_ldi_h(imm_s10: i32) -> v8i16 { macro_rules! call { ($imm_s10:expr) => { msa_ldi_h($imm_s10) }; } constify_imm_s10!(imm_s10, call) } /// Immediate Load /// /// The signed immediate imm_s10 is replicated in all vector /// (four signed 32-bit integer numbers) elements. For byte elements, /// only the least significant 8 bits of imm_s10 will be used. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ldi.w, imm_s10 = 0b1111111111))] #[rustc_args_required_const(0)] pub unsafe fn __msa_ldi_w(imm_s10: i32) -> v4i32 { macro_rules! call { ($imm_s10:expr) => { msa_ldi_w($imm_s10) }; } constify_imm_s10!(imm_s10, call) } /// Immediate Load /// /// The signed immediate imm_s10 is replicated in all vector /// (two signed 64-bit integer numbers) elements. For byte elements, /// only the least significant 8 bits of imm_s10 will be used. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ldi.d, imm_s10 = 0b1111111111))] #[rustc_args_required_const(0)] pub unsafe fn __msa_ldi_d(imm_s10: i32) -> v2i64 { macro_rules! call { ($imm_s10:expr) => { msa_ldi_d($imm_s10) }; } constify_imm_s10!(imm_s10, call) } /// Vector Fixed-Point Multiply and Add /// /// The products of fixed-point elements in 'b' (eight signed 16-bit integer numbers) /// by fixed-point elements in vector 'c' (eight signed 16-bit integer numbers) /// are added to the fixed-pointelements in vector 'a' (eight signed 16-bit integer numbers) /// The multiplication result is not saturated, i.e. exact (-1) * (-1) = 1 is added to the destination. /// The saturated fixed-point results are stored to vector 'a' /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(madd_q.h))] pub unsafe fn __msa_madd_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { msa_madd_q_h(a, ::mem::transmute(b), c) } /// Vector Fixed-Point Multiply and Add /// /// The products of fixed-point elements in 'b' (four signed 32-bit integer numbers) /// by fixed-point elements in vector 'c' (four signed 32-bit integer numbers) /// are added to the fixed-pointelements in vector 'a' (four signed 32-bit integer numbers) /// The multiplication result is not saturated, i.e. exact (-1) * (-1) = 1 is added to the destination. /// The saturated fixed-point results are stored to vector 'a' /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(madd_q.w))] pub unsafe fn __msa_madd_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { msa_madd_q_w(a, ::mem::transmute(b), c) } /// Vector Fixed-Point Multiply and Add Rounded /// /// The products of fixed-point elements in 'b' (eight signed 16-bit integer numbers) /// by fixed-point elements in vector 'c' (eight signed 16-bit integer numbers) /// are added to the fixed-pointelements in vector 'a' (eight signed 16-bit integer numbers) /// The multiplication result is not saturated, i.e. exact (-1) * (-1) = 1 is added to the destination. /// The rounded and saturated fixed-point results are stored to vector 'a' /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maddr_q.h))] pub unsafe fn __msa_maddr_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { msa_maddr_q_h(a, ::mem::transmute(b), c) } /// Vector Fixed-Point Multiply and Add Rounded /// /// The products of fixed-point elements in 'b' (four signed 32-bit integer numbers) /// by fixed-point elements in vector 'c' (four signed 32-bit integer numbers) /// are added to the fixed-pointelements in vector 'a' (four signed 32-bit integer numbers) /// The multiplication result is not saturated, i.e. exact (-1) * (-1) = 1 is added to the destination. /// The rounded and saturated fixed-point results are stored to vector 'a' /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maddr_q.w))] pub unsafe fn __msa_maddr_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { msa_maddr_q_w(a, ::mem::transmute(b), c) } /// Vector Multiply and Add /// /// The integer elements in vector 'b' (sixteen signed 8-bit integer numbers) /// are multiplied by integer elements in vector 'c' (sixteen signed 8-bit integer numbers) /// and added to the integer elements in vector 'a' (sixteen signed 8-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maddv.b))] pub unsafe fn __msa_maddv_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { msa_maddv_b(a, ::mem::transmute(b), c) } /// Vector Multiply and Add /// /// The integer elements in vector 'b' (eight signed 16-bit integer numbers) /// are multiplied by integer elements in vector 'c' (eight signed 16-bit integer numbers) /// and added to the integer elements in vector 'a' (eight signed 16-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maddv.h))] pub unsafe fn __msa_maddv_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { msa_maddv_h(a, ::mem::transmute(b), c) } /// Vector Multiply and Add /// /// The integer elements in vector 'b' (four signed 32-bit integer numbers) /// are multiplied by integer elements in vector 'c' (four signed 32-bit integer numbers) /// and added to the integer elements in vector 'a' (four signed 32-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maddv.w))] pub unsafe fn __msa_maddv_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { msa_maddv_w(a, ::mem::transmute(b), c) } /// Vector Multiply and Add /// /// The integer elements in vector 'b' (two signed 64-bit integer numbers) /// are multiplied by integer elements in vector 'c' (two signed 64-bit integer numbers) /// and added to the integer elements in vector 'a' (two signed 64-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maddv.d))] pub unsafe fn __msa_maddv_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { msa_maddv_d(a, ::mem::transmute(b), c) } /// Vector Maximum Based on Absolute Values /// /// The value with the largest magnitude, i.e. absolute value, between corresponding /// signed elements in vector 'a'(sixteen signed 8-bit integer numbers) and /// 'b'(sixteen signed 8-bit integer numbers) are written to vector /// (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_a.b))] pub unsafe fn __msa_max_a_b(a: v16i8, b: v16i8) -> v16i8 { msa_max_a_b(a, ::mem::transmute(b)) } /// Vector Maximum Based on Absolute Values /// /// The value with the largest magnitude, i.e. absolute value, between corresponding /// signed elements in vector 'a'(eight signed 16-bit integer numbers) and /// 'b'(eight signed 16-bit integer numbers) are written to vector /// (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_a.h))] pub unsafe fn __msa_max_a_h(a: v8i16, b: v8i16) -> v8i16 { msa_max_a_h(a, ::mem::transmute(b)) } /// Vector Maximum Based on Absolute Values /// /// The value with the largest magnitude, i.e. absolute value, between corresponding /// signed elements in vector 'a'(four signed 32-bit integer numbers) and /// 'b'(four signed 32-bit integer numbers) are written to vector /// (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_a.w))] pub unsafe fn __msa_max_a_w(a: v4i32, b: v4i32) -> v4i32 { msa_max_a_w(a, ::mem::transmute(b)) } /// Vector Maximum Based on Absolute Values /// /// The value with the largest magnitude, i.e. absolute value, between corresponding /// signed elements in vector 'a'(two signed 64-bit integer numbers) and /// 'b'(two signed 64-bit integer numbers) are written to vector /// (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_a.d))] pub unsafe fn __msa_max_a_d(a: v2i64, b: v2i64) -> v2i64 { msa_max_a_d(a, ::mem::transmute(b)) } /// Vector Signed Maximum /// /// Maximum values between signed elements in vector 'a'(sixteen signed 8-bit integer numbers) /// and signed elements in vector 'b'(sixteen signed 8-bit integer numbers) are written to vector /// (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_s.b))] pub unsafe fn __msa_max_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_max_s_b(a, ::mem::transmute(b)) } /// Vector Signed Maximum /// /// Maximum values between signed elements in vector 'a'(eight signed 16-bit integer numbers) /// and signed elements in vector 'b'(eight signed 16-bit integer numbers) are written to vector /// (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_s.h))] pub unsafe fn __msa_max_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_max_s_h(a, ::mem::transmute(b)) } /// Vector Signed Maximum /// /// Maximum values between signed elements in vector 'a'(four signed 32-bit integer numbers) /// and signed elements in vector 'b'(four signed 32-bit integer numbers) are written to vector /// (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_s.w))] pub unsafe fn __msa_max_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_max_s_w(a, ::mem::transmute(b)) } /// Vector Signed Maximum /// /// Maximum values between signed elements in vector 'a'(two signed 64-bit integer numbers) /// and signed elements in vector 'b'(two signed 64-bit integer numbers) are written to vector /// (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_s.d))] pub unsafe fn __msa_max_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_max_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Maximum /// /// Maximum values between unsigned elements in vector 'a'(sixteen unsigned 8-bit integer numbers) /// and unsigned elements in vector 'b'(sixteen unsigned 8-bit integer numbers) are written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_u.b))] pub unsafe fn __msa_max_u_b(a: v16u8, b: v16u8) -> v16u8 { msa_max_u_b(a, ::mem::transmute(b)) } /// Vector Unsigned Maximum /// /// Maximum values between unsigned elements in vector 'a'(eight unsigned 16-bit integer numbers) /// and unsigned elements in vector 'b'(eight unsigned 16-bit integer numbers) are written to vector /// (eight unsigned 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_u.h))] pub unsafe fn __msa_max_u_h(a: v8u16, b: v8u16) -> v8u16 { msa_max_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Maximum /// /// Maximum values between unsigned elements in vector 'a'(four unsigned 32-bit integer numbers) /// and unsigned elements in vector 'b'(four unsigned 32-bit integer numbers) are written to vector /// (four unsigned 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_u.w))] pub unsafe fn __msa_max_u_w(a: v4u32, b: v4u32) -> v4u32 { msa_max_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Maximum /// /// Maximum values between unsigned elements in vector 'a'(two unsigned 64-bit integer numbers) /// and unsigned elements in vector 'b'(two unsigned 64-bit integer numbers) are written to vector /// (two unsigned 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(max_u.d))] pub unsafe fn __msa_max_u_d(a: v2u64, b: v2u64) -> v2u64 { msa_max_u_d(a, ::mem::transmute(b)) } /// Immediate Signed Maximum /// /// Maximum values between signed elements in vector 'a'(sixteen signed 8-bit integer numbers) /// and the 5-bit signed immediate imm_s5 are written to vector /// (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maxi_s.b, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_maxi_s_b(a: v16i8, imm_s5: i32) -> v16i8 { macro_rules! call { ($imm_s5:expr) => { msa_maxi_s_b(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Signed Maximum /// /// Maximum values between signed elements in vector 'a'(eight signed 16-bit integer numbers) /// and the 5-bit signed immediate imm_s5 are written to vector /// (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maxi_s.h, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_maxi_s_h(a: v8i16, imm_s5: i32) -> v8i16 { macro_rules! call { ($imm_s5:expr) => { msa_maxi_s_h(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Signed Maximum /// /// Maximum values between signed elements in vector 'a'(four signed 32-bit integer numbers) /// and the 5-bit signed immediate imm_s5 are written to vector /// (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maxi_s.w, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_maxi_s_w(a: v4i32, imm_s5: i32) -> v4i32 { macro_rules! call { ($imm_s5:expr) => { msa_maxi_s_w(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Signed Maximum /// /// Maximum values between signed elements in vector 'a'(two signed 64-bit integer numbers) /// and the 5-bit signed immediate imm_s5 are written to vector /// (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maxi_s.d, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_maxi_s_d(a: v2i64, imm_s5: i32) -> v2i64 { macro_rules! call { ($imm_s5:expr) => { msa_maxi_s_d(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Unsigned Maximum /// /// Maximum values between unsigned elements in vector 'a'(sixteen unsigned 8-bit integer numbers) /// and the 5-bit unsigned immediate imm5 are written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maxi_u.b, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_maxi_u_b(a: v16u8, imm5: i32) -> v16u8 { macro_rules! call { ($imm5:expr) => { msa_maxi_u_b(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Unsigned Maximum /// /// Maximum values between unsigned elements in vector 'a'(eight unsigned 16-bit integer numbers) /// and the 5-bit unsigned immediate imm5 are written to vector /// (eight unsigned 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maxi_u.h, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_maxi_u_h(a: v8u16, imm5: i32) -> v8u16 { macro_rules! call { ($imm5:expr) => { msa_maxi_u_h(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Unsigned Maximum /// /// Maximum values between unsigned elements in vector 'a'(four unsigned 32-bit integer numbers) /// and the 5-bit unsigned immediate imm5 are written to vector /// (four unsigned 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maxi_u.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_maxi_u_w(a: v4u32, imm5: i32) -> v4u32 { macro_rules! call { ($imm5:expr) => { msa_maxi_u_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Unsigned Maximum /// /// Maximum values between unsigned elements in vector 'a'(two unsigned 64-bit integer numbers) /// and the 5-bit unsigned immediate imm5 are written to vector /// (two unsigned 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(maxi_u.d, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_maxi_u_d(a: v2u64, imm5: i32) -> v2u64 { macro_rules! call { ($imm5:expr) => { msa_maxi_u_d(a, $imm5) }; } constify_imm5!(imm5, call) } /// Vector Minimum Based on Absolute Value /// /// The value with the smallest magnitude, i.e. absolute value, between corresponding /// signed elements in vector 'a'(sixteen signed 8-bit integer numbers) and /// 'b'(sixteen signed 8-bit integer numbers) are written to vector /// (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_a.b))] pub unsafe fn __msa_min_a_b(a: v16i8, b: v16i8) -> v16i8 { msa_min_a_b(a, ::mem::transmute(b)) } /// Vector Minimum Based on Absolute Value /// /// The value with the smallest magnitude, i.e. absolute value, between corresponding /// signed elements in vector 'a'(eight signed 16-bit integer numbers) and /// 'b'(eight signed 16-bit integer numbers) are written to vector /// (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_a.h))] pub unsafe fn __msa_min_a_h(a: v8i16, b: v8i16) -> v8i16 { msa_min_a_h(a, ::mem::transmute(b)) } /// Vector Minimum Based on Absolute Value /// /// The value with the smallest magnitude, i.e. absolute value, between corresponding /// signed elements in vector 'a'(four signed 32-bit integer numbers) and /// 'b'(four signed 32-bit integer numbers) are written to vector /// (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_a.w))] pub unsafe fn __msa_min_a_w(a: v4i32, b: v4i32) -> v4i32 { msa_min_a_w(a, ::mem::transmute(b)) } /// Vector Minimum Based on Absolute Value /// /// The value with the smallest magnitude, i.e. absolute value, between corresponding /// signed elements in vector 'a'(two signed 64-bit integer numbers) and /// 'b'(two signed 64-bit integer numbers) are written to vector /// (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_a.d))] pub unsafe fn __msa_min_a_d(a: v2i64, b: v2i64) -> v2i64 { msa_min_a_d(a, ::mem::transmute(b)) } /// Vector Signed Minimum /// /// Minimum values between signed elements in vector 'a'(sixteen signed 8-bit integer numbers) /// and signed elements in vector 'b'(sixteen signed 8-bit integer numbers) are written to vector /// (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_s.b))] pub unsafe fn __msa_min_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_min_s_b(a, ::mem::transmute(b)) } /// Vector Signed Minimum /// /// Minimum values between signed elements in vector 'a'(eight signed 16-bit integer numbers) /// and signed elements in vector 'b'(eight signed 16-bit integer numbers) are written to vector /// (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_s.h))] pub unsafe fn __msa_min_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_min_s_h(a, ::mem::transmute(b)) } /// Vector Signed Minimum /// /// Minimum values between signed elements in vector 'a'(four signed 32-bit integer numbers) /// and signed elements in vector 'b'(four signed 32-bit integer numbers) are written to vector /// (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_s.w))] pub unsafe fn __msa_min_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_min_s_w(a, ::mem::transmute(b)) } /// Vector Signed Minimum /// /// Minimum values between signed elements in vector 'a'(two signed 64-bit integer numbers) /// and signed elements in vector 'b'(two signed 64-bit integer numbers) are written to vector /// (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_s.d))] pub unsafe fn __msa_min_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_min_s_d(a, ::mem::transmute(b)) } /// Immediate Signed Minimum /// /// Minimum values between signed elements in vector 'a'(sixteen signed 8-bit integer numbers) /// and the 5-bit signed immediate imm_s5 are written to vector /// (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mini_s.b, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_mini_s_b(a: v16i8, imm_s5: i32) -> v16i8 { macro_rules! call { ($imm_s5:expr) => { msa_mini_s_b(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Signed Minimum /// /// Minimum values between signed elements in vector 'a'(eight signed 16-bit integer numbers) /// and the 5-bit signed immediate imm_s5 are written to vector /// (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mini_s.h, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_mini_s_h(a: v8i16, imm_s5: i32) -> v8i16 { macro_rules! call { ($imm_s5:expr) => { msa_mini_s_h(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Signed Minimum /// /// Minimum values between signed elements in vector 'a'(four signed 32-bit integer numbers) /// and the 5-bit signed immediate imm_s5 are written to vector /// (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mini_s.w, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_mini_s_w(a: v4i32, imm_s5: i32) -> v4i32 { macro_rules! call { ($imm_s5:expr) => { msa_mini_s_w(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Immediate Signed Minimum /// /// Minimum values between signed elements in vector 'a'(two signed 64-bit integer numbers) /// and the 5-bit signed immediate imm_s5 are written to vector /// (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mini_s.d, imm_s5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_mini_s_d(a: v2i64, imm_s5: i32) -> v2i64 { macro_rules! call { ($imm_s5:expr) => { msa_mini_s_d(a, $imm_s5) }; } constify_imm_s5!(imm_s5, call) } /// Vector Unsigned Minimum /// /// Minimum values between unsigned elements in vector 'a'(sixteen unsigned 8-bit integer numbers) /// and unsigned elements in vector 'b'(sixteen unsigned 8-bit integer numbers) are written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_u.b))] pub unsafe fn __msa_min_u_b(a: v16u8, b: v16u8) -> v16u8 { msa_min_u_b(a, ::mem::transmute(b)) } /// Vector Unsigned Minimum /// /// Minimum values between unsigned elements in vector 'a'(eight unsigned 16-bit integer numbers) /// and unsigned elements in vector 'b'(eight unsigned 16-bit integer numbers) are written to vector /// (eight unsigned 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_u.h))] pub unsafe fn __msa_min_u_h(a: v8u16, b: v8u16) -> v8u16 { msa_min_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Minimum /// /// Minimum values between unsigned elements in vector 'a'(four unsigned 32-bit integer numbers) /// and unsigned elements in vector 'b'(four unsigned 32-bit integer numbers) are written to vector /// (four unsigned 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_u.w))] pub unsafe fn __msa_min_u_w(a: v4u32, b: v4u32) -> v4u32 { msa_min_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Minimum /// /// Minimum values between unsigned elements in vector 'a'(two unsigned 64-bit integer numbers) /// and unsigned elements in vector 'b'(two unsigned 64-bit integer numbers) are written to vector /// (two unsigned 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(min_u.d))] pub unsafe fn __msa_min_u_d(a: v2u64, b: v2u64) -> v2u64 { msa_min_u_d(a, ::mem::transmute(b)) } /// Immediate Unsigned Minimum /// /// Minimum values between unsigned elements in vector 'a'(sixteen unsigned 8-bit integer numbers) /// and the 5-bit unsigned immediate imm5 are written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mini_u.b, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_mini_u_b(a: v16u8, imm5: i32) -> v16u8 { macro_rules! call { ($imm5:expr) => { msa_mini_u_b(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Unsigned Minimum /// /// Minimum values between unsigned elements in vector 'a'(eight unsigned 16-bit integer numbers) /// and the 5-bit unsigned immediate imm5 are written to vector /// (eight unsigned 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mini_u.h, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_mini_u_h(a: v8u16, imm5: i32) -> v8u16 { macro_rules! call { ($imm5:expr) => { msa_mini_u_h(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Unsigned Minimum /// /// Minimum values between unsigned elements in vector 'a'(four unsigned 32-bit integer numbers) /// and the 5-bit unsigned immediate imm5 are written to vector /// (four unsigned 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mini_u.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_mini_u_w(a: v4u32, imm5: i32) -> v4u32 { macro_rules! call { ($imm5:expr) => { msa_mini_u_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Unsigned Minimum /// /// Minimum values between unsigned elements in vector 'a'(two unsigned 64-bit integer numbers) /// and the 5-bit unsigned immediate imm5 are written to vector /// (two unsigned 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mini_u.d, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_mini_u_d(a: v2u64, imm5: i32) -> v2u64 { macro_rules! call { ($imm5:expr) => { msa_mini_u_d(a, $imm5) }; } constify_imm5!(imm5, call) } /// Vector Signed Modulo /// /// The signed integer elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are divided by signed integer elements in vector 'b'(sixteen signed 8-bit integer numbers) /// The remainder of thesame sign as the dividend is written to vector /// (sixteen signed 8-bit integer numbers).If a divisor element vectorwt is zero, /// the result value is UNPREDICTABLE. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mod_s.b))] pub unsafe fn __msa_mod_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_mod_s_b(a, ::mem::transmute(b)) } /// Vector Signed Modulo /// /// The signed integer elements in vector 'a'(eight signed 16-bit integer numbers) /// are divided by signed integer elements in vector 'b'(eight signed 16-bit integer numbers) /// The remainder of thesame sign as the dividend is written to vector /// (eight signed 16-bit integer numbers).If a divisor element vectorwt is zero, /// the result value is UNPREDICTABLE. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mod_s.h))] pub unsafe fn __msa_mod_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_mod_s_h(a, ::mem::transmute(b)) } /// Vector Signed Modulo /// /// The signed integer elements in vector 'a'(four signed 32-bit integer numbers) /// are divided by signed integer elements in vector 'b'(four signed 32-bit integer numbers) /// The remainder of thesame sign as the dividend is written to vector /// (four signed 32-bit integer numbers).If a divisor element vectorwt is zero, /// the result value is UNPREDICTABLE. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mod_s.w))] pub unsafe fn __msa_mod_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_mod_s_w(a, ::mem::transmute(b)) } /// Vector Signed Modulo /// /// The signed integer elements in vector 'a'(two signed 64-bit integer numbers) /// are divided by signed integer elements in vector 'b'(two signed 64-bit integer numbers) /// The remainder of thesame sign as the dividend is written to vector /// (two signed 64-bit integer numbers).If a divisor element vectorwt is zero, /// the result value is UNPREDICTABLE. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mod_s.d))] pub unsafe fn __msa_mod_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_mod_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Modulo /// /// The unsigned integer elements in vector 'a'(sixteen unsigned 8-bit integer numbers) /// are divided by unsigned integer elements in vector 'b'(sixteen unsigned 8-bit integer numbers) /// The remainder of thesame sign as the dividend is written to vector /// (sixteen unsigned 8-bit integer numbers).If a divisor element vectorwt is zero, /// the result value is UNPREDICTABLE. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mod_u.b))] pub unsafe fn __msa_mod_u_b(a: v16u8, b: v16u8) -> v16u8 { msa_mod_u_b(a, ::mem::transmute(b)) } /// Vector Unsigned Modulo /// /// The unsigned integer elements in vector 'a'(eight unsigned 16-bit integer numbers) /// are divided by unsigned integer elements in vector 'b'(eight unsigned 16-bit integer numbers) /// The remainder of thesame sign as the dividend is written to vector /// (eight unsigned 16-bit integer numbers).If a divisor element vectorwt is zero, /// the result value is UNPREDICTABLE. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mod_u.h))] pub unsafe fn __msa_mod_u_h(a: v8u16, b: v8u16) -> v8u16 { msa_mod_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Modulo /// /// The unsigned integer elements in vector 'a'(four unsigned 32-bit integer numbers) /// are divided by unsigned integer elements in vector 'b'(four unsigned 32-bit integer numbers) /// The remainder of thesame sign as the dividend is written to vector /// (four unsigned 32-bit integer numbers).If a divisor element vectorwt is zero, /// the result value is UNPREDICTABLE. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mod_u.w))] pub unsafe fn __msa_mod_u_w(a: v4u32, b: v4u32) -> v4u32 { msa_mod_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Modulo /// /// The unsigned integer elements in vector 'a'(two unsigned 64-bit integer numbers) /// are divided by unsigned integer elements in vector 'b'(two unsigned 64-bit integer numbers) /// The remainder of thesame sign as the dividend is written to vector /// (two unsigned 64-bit integer numbers).If a divisor element vectorwt is zero, /// the result value is UNPREDICTABLE. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mod_u.d))] pub unsafe fn __msa_mod_u_d(a: v2u64, b: v2u64) -> v2u64 { msa_mod_u_d(a, ::mem::transmute(b)) } /// Vector Move /// /// Copy all WRLEN bits in vector 'a'(eight signed 16-bit integer numbers) /// to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(move.v))] pub unsafe fn __msa_move_v(a: v16i8) -> v16i8 { msa_move_v(a) } /// Vector Fixed-Point Multiply and Subtract /// /// The product of fixed-point elements in vector 'c'(eight signed 16-bit integer numbers) /// by fixed-point elements in vector 'b'(eight signed 16-bit integer numbers) /// are subtracted from the fixed-point elements in vector 'a' /// (eight signed 16-bit integer numbers).The multiplication result is not saturated, /// i.e. exact (-1) * (-1) = 1 is subtracted from the destination. /// The saturated fixed-point results are stored back to vector 'a' /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(msub_q.h))] pub unsafe fn __msa_msub_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { msa_msub_q_h(a, ::mem::transmute(b), c) } /// Vector Fixed-Point Multiply and Subtract /// /// The product of fixed-point elements in vector 'c'(four signed 32-bit integer numbers) /// by fixed-point elements in vector 'b'(four signed 32-bit integer numbers) /// are subtracted from the fixed-point elements in vector 'a' /// (four signed 32-bit integer numbers).The multiplication result is not saturated, /// i.e. exact (-1) * (-1) = 1 is subtracted from the destination. /// The saturated fixed-point results are stored back to vector 'a' /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(msub_q.w))] pub unsafe fn __msa_msub_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { msa_msub_q_w(a, ::mem::transmute(b), c) } /// Vector Fixed-Point Multiply and Subtract Rounded /// /// The product of fixed-point elements in vector 'c'(eight signed 16-bit integer numbers) /// by fixed-point elements in vector 'b'(eight signed 16-bit integer numbers) /// are subtracted from the fixed-point elements in vector 'a' /// (eight signed 16-bit integer numbers).The multiplication result is not saturated, /// i.e. exact (-1) * (-1) = 1 is subtracted from the destination. /// The rounded and saturated fixed-point results are stored back to vector 'a' /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(msubr_q.h))] pub unsafe fn __msa_msubr_q_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { msa_msubr_q_h(a, ::mem::transmute(b), c) } /// Vector Fixed-Point Multiply and Subtract Rounded /// /// The product of fixed-point elements in vector 'c'(four signed 32-bit integer numbers) /// by fixed-point elements in vector 'b'(four signed 32-bit integer numbers) /// are subtracted from the fixed-point elements in vector 'a' /// (four signed 32-bit integer numbers).The multiplication result is not saturated, /// i.e. exact (-1) * (-1) = 1 is subtracted from the destination. /// The rounded and saturated fixed-point results are stored back to vector 'a' /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(msubr_q.w))] pub unsafe fn __msa_msubr_q_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { msa_msubr_q_w(a, ::mem::transmute(b), c) } /// Vector Multiply and Subtract /// /// The integer elements in vector 'c'(sixteen signed 8-bit integer numbers) /// are multiplied by integer elements in vector 'b'(sixteen signed 8-bit integer numbers) /// and subtracted from the integer elements in vector 'a'(sixteen signed 8-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(msubv.b))] pub unsafe fn __msa_msubv_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { msa_msubv_b(a, ::mem::transmute(b), c) } /// Vector Multiply and Subtract /// /// The integer elements in vector 'c'(eight signed 16-bit integer numbers) /// are multiplied by integer elements in vector 'b'(eight signed 16-bit integer numbers) /// and subtracted from the integer elements in vector 'a'(eight signed 16-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(msubv.h))] pub unsafe fn __msa_msubv_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { msa_msubv_h(a, ::mem::transmute(b), c) } /// Vector Multiply and Subtract /// /// The integer elements in vector 'c'(four signed 32-bit integer numbers) /// are multiplied by integer elements in vector 'b'(four signed 32-bit integer numbers) /// and subtracted from the integer elements in vector 'a'(four signed 32-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(msubv.w))] pub unsafe fn __msa_msubv_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { msa_msubv_w(a, ::mem::transmute(b), c) } /// Vector Multiply and Subtract /// /// The integer elements in vector 'c'(two signed 64-bit integer numbers) /// are multiplied by integer elements in vector 'b'(two signed 64-bit integer numbers) /// and subtracted from the integer elements in vector 'a'(two signed 64-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(msubv.d))] pub unsafe fn __msa_msubv_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { msa_msubv_d(a, ::mem::transmute(b), c) } /// Vector Fixed-Point Multiply /// /// The fixed-point elements in vector 'a'(eight signed 16-bit integer numbers) /// multiplied by fixed-point elements in vector 'b'(eight signed 16-bit integer numbers) /// The result is written to vector (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mul_q.h))] pub unsafe fn __msa_mul_q_h(a: v8i16, b: v8i16) -> v8i16 { msa_mul_q_h(a, ::mem::transmute(b)) } /// Vector Fixed-Point Multiply /// /// The fixed-point elements in vector 'a'(four signed 32-bit integer numbers) /// multiplied by fixed-point elements in vector 'b'(four signed 32-bit integer numbers) /// The result is written to vector (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mul_q.w))] pub unsafe fn __msa_mul_q_w(a: v4i32, b: v4i32) -> v4i32 { msa_mul_q_w(a, ::mem::transmute(b)) } /// Vector Fixed-Point Multiply Rounded /// /// The fixed-point elements in vector 'a'(eight signed 16-bit integer numbers) /// multiplied by fixed-point elements in vector 'b'(eight signed 16-bit integer numbers) /// The rounded result is written to vector (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mulr_q.h))] pub unsafe fn __msa_mulr_q_h(a: v8i16, b: v8i16) -> v8i16 { msa_mulr_q_h(a, ::mem::transmute(b)) } /// Vector Fixed-Point Multiply Rounded /// /// The fixed-point elements in vector 'a'(four signed 32-bit integer numbers) /// multiplied by fixed-point elements in vector 'b'(four signed 32-bit integer numbers) /// The rounded result is written to vector (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mulr_q.w))] pub unsafe fn __msa_mulr_q_w(a: v4i32, b: v4i32) -> v4i32 { msa_mulr_q_w(a, ::mem::transmute(b)) } /// Vector Multiply /// /// The integer elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are multiplied by integer elements in vector 'b'(sixteen signed 8-bit integer numbers) /// The result is written to vector (sixteen signed 8-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mulv.b))] pub unsafe fn __msa_mulv_b(a: v16i8, b: v16i8) -> v16i8 { msa_mulv_b(a, ::mem::transmute(b)) } /// Vector Multiply /// /// The integer elements in vector 'a'(eight signed 16-bit integer numbers) /// are multiplied by integer elements in vector 'b'(eight signed 16-bit integer numbers) /// The result is written to vector (eight signed 16-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mulv.h))] pub unsafe fn __msa_mulv_h(a: v8i16, b: v8i16) -> v8i16 { msa_mulv_h(a, ::mem::transmute(b)) } /// Vector Multiply /// /// The integer elements in vector 'a'(four signed 32-bit integer numbers) /// are multiplied by integer elements in vector 'b'(four signed 32-bit integer numbers) /// The result is written to vector (four signed 32-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mulv.w))] pub unsafe fn __msa_mulv_w(a: v4i32, b: v4i32) -> v4i32 { msa_mulv_w(a, ::mem::transmute(b)) } /// Vector Multiply /// /// The integer elements in vector 'a'(two signed 64-bit integer numbers) /// are multiplied by integer elements in vector 'b'(two signed 64-bit integer numbers) /// The result is written to vector (two signed 64-bit integer numbers) /// The most significant half of the multiplication result is discarded. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(mulv.d))] pub unsafe fn __msa_mulv_d(a: v2i64, b: v2i64) -> v2i64 { msa_mulv_d(a, ::mem::transmute(b)) } /// Vector Leading Ones Count /// /// The number of leading ones for elements in vector 'a'(sixteen signed 8-bit integer numbers) /// is stored to the elements in vector (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nloc.b))] pub unsafe fn __msa_nloc_b(a: v16i8) -> v16i8 { msa_nloc_b(a) } /// Vector Leading Ones Count /// /// The number of leading ones for elements in vector 'a'(eight signed 16-bit integer numbers) /// is stored to the elements in vector (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nloc.h))] pub unsafe fn __msa_nloc_h(a: v8i16) -> v8i16 { msa_nloc_h(a) } /// Vector Leading Ones Count /// /// The number of leading ones for elements in vector 'a'(four signed 32-bit integer numbers) /// is stored to the elements in vector (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nloc.w))] pub unsafe fn __msa_nloc_w(a: v4i32) -> v4i32 { msa_nloc_w(a) } /// Vector Leading Ones Count /// /// The number of leading ones for elements in vector 'a'(two signed 64-bit integer numbers) /// is stored to the elements in vector (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nloc.d))] pub unsafe fn __msa_nloc_d(a: v2i64) -> v2i64 { msa_nloc_d(a) } /// Vector Leading Zeros Count /// /// The number of leading zeros for elements in vector 'a'(sixteen signed 8-bit integer numbers) /// is stored to the elements in vector (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nlzc.b))] pub unsafe fn __msa_nlzc_b(a: v16i8) -> v16i8 { msa_nlzc_b(a) } /// Vector Leading Zeros Count /// /// The number of leading zeros for elements in vector 'a'(eight signed 16-bit integer numbers) /// is stored to the elements in vector (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nlzc.h))] pub unsafe fn __msa_nlzc_h(a: v8i16) -> v8i16 { msa_nlzc_h(a) } /// Vector Leading Zeros Count /// /// The number of leading zeros for elements in vector 'a'(four signed 32-bit integer numbers) /// is stored to the elements in vector (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nlzc.w))] pub unsafe fn __msa_nlzc_w(a: v4i32) -> v4i32 { msa_nlzc_w(a) } /// Vector Leading Zeros Count /// /// The number of leading zeros for elements in vector 'a'(two signed 64-bit integer numbers) /// is stored to the elements in vector (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nlzc.d))] pub unsafe fn __msa_nlzc_d(a: v2i64) -> v2i64 { msa_nlzc_d(a) } /// Vector Logical Negated Or /// /// Each bit of vector 'a'(sixteen unsigned 8-bit integer numbers) /// is combined with the corresponding bit of vector 'b' (sixteen unsigned 8-bit integer numbers) /// in a bitwise logical NOR operation. The result is written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nor.v))] pub unsafe fn __msa_nor_v(a: v16u8, b: v16u8) -> v16u8 { msa_nor_v(a, ::mem::transmute(b)) } /// Immediate Logical Negated Or /// /// Each bit of vector 'a'(sixteen unsigned 8-bit integer numbers) /// is combined with the 8-bit immediate imm8 /// in a bitwise logical NOR operation. The result is written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(nori.b, imm8 = 0b11111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_nori_b(a: v16u8, imm8: i32) -> v16u8 { macro_rules! call { ($imm8:expr) => { msa_nori_b(a, $imm8) }; } constify_imm8!(imm8, call) } /// Vector Logical Or /// /// Each bit of vector 'a'(sixteen unsigned 8-bit integer numbers) /// is combined with the corresponding bit of vector 'b' (sixteen unsigned 8-bit integer numbers) /// in a bitwise logical OR operation. The result is written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(or.v))] pub unsafe fn __msa_or_v(a: v16u8, b: v16u8) -> v16u8 { msa_or_v(a, ::mem::transmute(b)) } /// Immediate Logical Or /// /// Each bit of vector 'a'(sixteen unsigned 8-bit integer numbers) /// is combined with the 8-bit immediate imm8 /// in a bitwise logical OR operation. The result is written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(ori.b, imm8 = 0b11111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_ori_b(a: v16u8, imm8: i32) -> v16u8 { macro_rules! call { ($imm8:expr) => { msa_ori_b(a, $imm8) }; } constify_imm8!(imm8, call) } /// Vector Pack Even /// /// Even elements in vectors 'a' (sixteen signed 8-bit integer numbers) /// are copied to the left half of the result vector and even elements in vector 'b' /// (sixteen signed 8-bit integer numbers) are copied to the right half of the result vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pckev.b))] pub unsafe fn __msa_pckev_b(a: v16i8, b: v16i8) -> v16i8 { msa_pckev_b(a, ::mem::transmute(b)) } /// Vector Pack Even /// /// Even elements in vectors 'a' (eight signed 16-bit integer numbers) /// are copied to the left half of the result vector and even elements in vector 'b' /// (eight signed 16-bit integer numbers) are copied to the right half of the result vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pckev.h))] pub unsafe fn __msa_pckev_h(a: v8i16, b: v8i16) -> v8i16 { msa_pckev_h(a, ::mem::transmute(b)) } /// Vector Pack Even /// /// Even elements in vectors 'a' (four signed 32-bit integer numbers) /// are copied to the left half of the result vector and even elements in vector 'b' /// (four signed 32-bit integer numbers) are copied to the right half of the result vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pckev.w))] pub unsafe fn __msa_pckev_w(a: v4i32, b: v4i32) -> v4i32 { msa_pckev_w(a, ::mem::transmute(b)) } /// Vector Pack Even /// /// Even elements in vectors 'a' (two signed 64-bit integer numbers) /// are copied to the left half of the result vector and even elements in vector 'b' /// (two signed 64-bit integer numbers) are copied to the right half of the result vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pckev.d))] pub unsafe fn __msa_pckev_d(a: v2i64, b: v2i64) -> v2i64 { msa_pckev_d(a, ::mem::transmute(b)) } /// Vector Pack Odd /// /// Odd elements in vectors 'a' (sixteen signed 8-bit integer numbers) /// are copied to the left half of the result vector and odd elements in vector 'b' /// (sixteen signed 8-bit integer numbers) are copied to the right half of the result vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pckod.b))] pub unsafe fn __msa_pckod_b(a: v16i8, b: v16i8) -> v16i8 { msa_pckod_b(a, ::mem::transmute(b)) } /// Vector Pack Odd /// /// Odd elements in vectors 'a' (eight signed 16-bit integer numbers) /// are copied to the left half of the result vector and odd elements in vector 'b' /// (eight signed 16-bit integer numbers) are copied to the right half of the result vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pckod.h))] pub unsafe fn __msa_pckod_h(a: v8i16, b: v8i16) -> v8i16 { msa_pckod_h(a, ::mem::transmute(b)) } /// Vector Pack Odd /// /// Odd elements in vectors 'a' (four signed 32-bit integer numbers) /// are copied to the left half of the result vector and odd elements in vector 'b' /// (four signed 32-bit integer numbers) are copied to the right half of the result vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pckod.w))] pub unsafe fn __msa_pckod_w(a: v4i32, b: v4i32) -> v4i32 { msa_pckod_w(a, ::mem::transmute(b)) } /// Vector Pack Odd /// /// Odd elements in vectors 'a' (two signed 64-bit integer numbers) /// are copied to the left half of the result vector and odd elements in vector 'b' /// (two signed 64-bit integer numbers) are copied to the right half of the result vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pckod.d))] pub unsafe fn __msa_pckod_d(a: v2i64, b: v2i64) -> v2i64 { msa_pckod_d(a, ::mem::transmute(b)) } /// Vector Population Count /// /// The number of bits set to 1 for elements in vector 'a' (sixteen signed 8-bit integer numbers) /// is stored to the elements in the result vector (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pcnt.b))] pub unsafe fn __msa_pcnt_b(a: v16i8) -> v16i8 { msa_pcnt_b(a) } /// Vector Population Count /// /// The number of bits set to 1 for elements in vector 'a' (eight signed 16-bit integer numbers) /// is stored to the elements in the result vector (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pcnt.h))] pub unsafe fn __msa_pcnt_h(a: v8i16) -> v8i16 { msa_pcnt_h(a) } /// Vector Population Count /// /// The number of bits set to 1 for elements in vector 'a' (four signed 32-bit integer numbers) /// is stored to the elements in the result vector (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pcnt.w))] pub unsafe fn __msa_pcnt_w(a: v4i32) -> v4i32 { msa_pcnt_w(a) } /// Vector Population Count /// /// The number of bits set to 1 for elements in vector 'a' (two signed 64-bit integer numbers) /// is stored to the elements in the result vector (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(pcnt.d))] pub unsafe fn __msa_pcnt_d(a: v2i64) -> v2i64 { msa_pcnt_d(a) } /// Immediate Signed Saturate /// /// Signed elements in vector 'a' (sixteen signed 8-bit integer numbers) /// are saturated to signed values of imm3+1 bits without changing the data width /// The result is stored in the vector (sixteen signed 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sat_s.b, imm4 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_sat_s_b(a: v16i8, imm3: i32) -> v16i8 { macro_rules! call { ($imm3:expr) => { msa_sat_s_b(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Signed Saturate /// /// Signed elements in vector 'a' (eight signed 16-bit integer numbers) /// are saturated to signed values of imm4+1 bits without changing the data width /// The result is stored in the vector (eight signed 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sat_s.h, imm3 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_sat_s_h(a: v8i16, imm4: i32) -> v8i16 { macro_rules! call { ($imm4:expr) => { msa_sat_s_h(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Signed Saturate /// /// Signed elements in vector 'a' (four signed 32-bit integer numbers) /// are saturated to signed values of imm5+1 bits without changing the data width /// The result is stored in the vector (four signed 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sat_s.w, imm2 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_sat_s_w(a: v4i32, imm5: i32) -> v4i32 { macro_rules! call { ($imm5:expr) => { msa_sat_s_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Signed Saturate /// /// Signed elements in vector 'a' (two signed 64-bit integer numbers) /// are saturated to signed values of imm6+1 bits without changing the data width /// The result is stored in the vector (two signed 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sat_s.d, imm1 = 0b111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_sat_s_d(a: v2i64, imm6: i32) -> v2i64 { macro_rules! call { ($imm6:expr) => { msa_sat_s_d(a, $imm6) }; } constify_imm6!(imm6, call) } /// Immediate Unsigned Saturate /// /// Unsigned elements in vector 'a' (sixteen unsigned 8-bit integer numbers) /// are saturated to unsigned values of imm3+1 bits without changing the data width /// The result is stored in the vector (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sat_u.b, imm4 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_sat_u_b(a: v16u8, imm3: i32) -> v16u8 { macro_rules! call { ($imm3:expr) => { msa_sat_u_b(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Unsigned Saturate /// /// Unsigned elements in vector 'a' (eight unsigned 16-bit integer numbers) /// are saturated to unsigned values of imm4+1 bits without changing the data width /// The result is stored in the vector (eight unsigned 16-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sat_u.h, imm3 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_sat_u_h(a: v8u16, imm4: i32) -> v8u16 { macro_rules! call { ($imm4:expr) => { msa_sat_u_h(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Unsigned Saturate /// /// Unsigned elements in vector 'a' (four unsigned 32-bit integer numbers) /// are saturated to unsigned values of imm5+1 bits without changing the data width /// The result is stored in the vector (four unsigned 32-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sat_u.w, imm2 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_sat_u_w(a: v4u32, imm5: i32) -> v4u32 { macro_rules! call { ($imm5:expr) => { msa_sat_u_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Unsigned Saturate /// /// Unsigned elements in vector 'a' (two unsigned 64-bit integer numbers) /// are saturated to unsigned values of imm6+1 bits without changing the data width /// The result is stored in the vector (two unsigned 64-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sat_u.d, imm1 = 0b111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_sat_u_d(a: v2u64, imm6: i32) -> v2u64 { macro_rules! call { ($imm6:expr) => { msa_sat_u_d(a, $imm6) }; } constify_imm6!(imm6, call) } /// Immediate Set Shuffle Elements /// /// The set shuffle instruction works on 4-element sets. /// All sets are shuffled in the same way: the element i82i+1..2i in 'a' /// (sixteen signed 8-bit integer numbers) is copied over the element i in result vector /// (sixteen signed 8-bit integer numbers), where i is 0, 1, 2, 3. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(shf.b, imm8 = 0b11111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_shf_b(a: v16i8, imm8: i32) -> v16i8 { macro_rules! call { ($imm8:expr) => { msa_shf_b(a, $imm8) }; } constify_imm8!(imm8, call) } /// Immediate Set Shuffle Elements /// /// The set shuffle instruction works on 4-element sets. /// All sets are shuffled in the same way: the element i82i+1..2i in 'a' /// (eight signed 16-bit integer numbers) is copied over the element i in result vector /// (eight signed 16-bit integer numbers), where i is 0, 1, 2, 3. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(shf.h, imm8 = 0b11111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_shf_h(a: v8i16, imm8: i32) -> v8i16 { macro_rules! call { ($imm8:expr) => { msa_shf_h(a, $imm8) }; } constify_imm8!(imm8, call) } /// Immediate Set Shuffle Elements /// /// The set shuffle instruction works on 4-element sets. /// All sets are shuffled in the same way: the element i82i+1..2i in 'a' /// (four signed 32-bit integer numbers) is copied over the element i in result vector /// (four signed 32-bit integer numbers), where i is 0, 1, 2, 3. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(shf.w, imm8 = 0b11111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_shf_w(a: v4i32, imm8: i32) -> v4i32 { macro_rules! call { ($imm8:expr) => { msa_shf_w(a, $imm8) }; } constify_imm8!(imm8, call) } /// GPR Columns Slide /// /// Vector registers 'a' (sixteen signed 8-bit integer numbers) and 'b' /// (sixteen signed 8-bit integer numbers) contain 2-dimensional byte arrays (rectangles) /// stored row-wise with as many rows asbytes in integer data format df. /// The two source rectangles 'b' and 'a' are concatenated horizontally in the order /// they appear in the syntax, i.e. first 'a' and then 'b'. Place a new destination /// rectangle over 'b' and then slide it to the left over the concatenation of 'a' and 'b' /// by the number of columns given in GPR 'c'. /// The result is written to vector (sixteen signed 8-bit integer numbers). /// GPR 'c' value is interpreted modulo the number of columns in destination rectangle, /// or equivalently, the number of data format df elements in the destination vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sld.b))] pub unsafe fn __msa_sld_b(a: v16i8, b: v16i8, c: i32) -> v16i8 { msa_sld_b(a, ::mem::transmute(b), c) } /// GPR Columns Slide /// /// Vector registers 'a' (eight signed 16-bit integer numbers) and 'b' /// (eight signed 16-bit integer numbers) contain 2-dimensional byte arrays (rectangles) /// stored row-wise with as many rows asbytes in integer data format df. /// The two source rectangles 'b' and 'a' are concatenated horizontally in the order /// they appear in the syntax, i.e. first 'a' and then 'b'. Place a new destination /// rectangle over 'b' and then slide it to the left over the concatenation of 'a' and 'b' /// by the number of columns given in GPR 'c'. /// The result is written to vector (eight signed 16-bit integer numbers). /// GPR 'c' value is interpreted modulo the number of columns in destination rectangle, /// or equivalently, the number of data format df elements in the destination vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sld.h))] pub unsafe fn __msa_sld_h(a: v8i16, b: v8i16, c: i32) -> v8i16 { msa_sld_h(a, ::mem::transmute(b), c) } /// GPR Columns Slide /// /// Vector registers 'a' (four signed 32-bit integer numbers) and 'b' /// (four signed 32-bit integer numbers) contain 2-dimensional byte arrays (rectangles) /// stored row-wise with as many rows asbytes in integer data format df. /// The two source rectangles 'b' and 'a' are concatenated horizontally in the order /// they appear in the syntax, i.e. first 'a' and then 'b'. Place a new destination /// rectangle over 'b' and then slide it to the left over the concatenation of 'a' and 'b' /// by the number of columns given in GPR 'c'. /// The result is written to vector (four signed 32-bit integer numbers). /// GPR 'c' value is interpreted modulo the number of columns in destination rectangle, /// or equivalently, the number of data format df elements in the destination vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sld.w))] pub unsafe fn __msa_sld_w(a: v4i32, b: v4i32, c: i32) -> v4i32 { msa_sld_w(a, ::mem::transmute(b), c) } /// GPR Columns Slide /// /// Vector registers 'a' (two signed 64-bit integer numbers) and 'b' /// (two signed 64-bit integer numbers) contain 2-dimensional byte arrays (rectangles) /// stored row-wise with as many rows asbytes in integer data format df. /// The two source rectangles 'b' and 'a' are concatenated horizontally in the order /// they appear in the syntax, i.e. first 'a' and then 'b'. Place a new destination /// rectangle over 'b' and then slide it to the left over the concatenation of 'a' and 'b' /// by the number of columns given in GPR 'c'. /// The result is written to vector (two signed 64-bit integer numbers). /// GPR 'c' value is interpreted modulo the number of columns in destination rectangle, /// or equivalently, the number of data format df elements in the destination vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sld.d))] pub unsafe fn __msa_sld_d(a: v2i64, b: v2i64, c: i32) -> v2i64 { msa_sld_d(a, ::mem::transmute(b), c) } /// Immediate Columns Slide /// /// Vector registers 'a' (sixteen signed 8-bit integer numbers) and 'b' /// (sixteen signed 8-bit integer numbers) contain 2-dimensional byte arrays (rectangles) /// stored row-wise with as many rows asbytes in integer data format df. /// The two source rectangles 'b' and 'a' are concatenated horizontally in the order /// they appear in the syntax, i.e. first 'a' and then 'b'. Place a new destination /// rectangle over 'b' and then slide it to the left over the concatenation of 'a' and 'b' /// by imm1 columns /// The result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sldi.b, imm4 = 0b1111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_sldi_b(a: v16i8, b: v16i8, imm4: i32) -> v16i8 { macro_rules! call { ($imm4:expr) => { msa_sldi_b(a, ::mem::transmute(b), $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Columns Slide /// /// Vector registers 'a' (eight signed 16-bit integer numbers) and 'b' /// (eight signed 16-bit integer numbers) contain 2-dimensional byte arrays (rectangles) /// stored row-wise with as many rows asbytes in integer data format df. /// The two source rectangles 'b' and 'a' are concatenated horizontally in the order /// they appear in the syntax, i.e. first 'a' and then 'b'. Place a new destination /// rectangle over 'b' and then slide it to the left over the concatenation of 'a' and 'b' /// by imm1 columns /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sldi.h, imm3 = 0b111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_sldi_h(a: v8i16, b: v8i16, imm3: i32) -> v8i16 { macro_rules! call { ($imm3:expr) => { msa_sldi_h(a, ::mem::transmute(b), $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Columns Slide /// /// Vector registers 'a' (four signed 32-bit integer numbers) and 'b' /// (four signed 32-bit integer numbers) contain 2-dimensional byte arrays (rectangles) /// stored row-wise with as many rows asbytes in integer data format df. /// The two source rectangles 'b' and 'a' are concatenated horizontally in the order /// they appear in the syntax, i.e. first 'a' and then 'b'. Place a new destination /// rectangle over 'b' and then slide it to the left over the concatenation of 'a' and 'b' /// by imm1 columns /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sldi.w, imm2 = 0b11))] #[rustc_args_required_const(2)] pub unsafe fn __msa_sldi_w(a: v4i32, b: v4i32, imm2: i32) -> v4i32 { macro_rules! call { ($imm2:expr) => { msa_sldi_w(a, ::mem::transmute(b), $imm2) }; } constify_imm2!(imm2, call) } /// Immediate Columns Slide /// /// Vector registers 'a' (two signed 64-bit integer numbers) and 'b' /// (two signed 64-bit integer numbers) contain 2-dimensional byte arrays (rectangles) /// stored row-wise with as many rows asbytes in integer data format df. /// The two source rectangles 'b' and 'a' are concatenated horizontally in the order /// they appear in the syntax, i.e. first 'a' and then 'b'. Place a new destination /// rectangle over 'b' and then slide it to the left over the concatenation of 'a' and 'b' /// by imm1 columns /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sldi.d, imm1 = 0b1))] #[rustc_args_required_const(2)] pub unsafe fn __msa_sldi_d(a: v2i64, b: v2i64, imm1: i32) -> v2i64 { macro_rules! call { ($imm1:expr) => { msa_sldi_d(a, ::mem::transmute(b), $imm1) }; } constify_imm1!(imm1, call) } /// Vector Shift Left /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted left by the number of bits the elements in vector 'b' /// (sixteen signed 8-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sll.b))] pub unsafe fn __msa_sll_b(a: v16i8, b: v16i8) -> v16i8 { msa_sll_b(a, ::mem::transmute(b)) } /// Vector Shift Left /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted left by the number of bits the elements in vector 'b' /// (eight signed 16-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sll.h))] pub unsafe fn __msa_sll_h(a: v8i16, b: v8i16) -> v8i16 { msa_sll_h(a, ::mem::transmute(b)) } /// Vector Shift Left /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted left by the number of bits the elements in vector 'b' /// (four signed 32-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sll.w))] pub unsafe fn __msa_sll_w(a: v4i32, b: v4i32) -> v4i32 { msa_sll_w(a, ::mem::transmute(b)) } /// Vector Shift Left /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted left by the number of bits the elements in vector 'b' /// (two signed 64-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sll.d))] pub unsafe fn __msa_sll_d(a: v2i64, b: v2i64) -> v2i64 { msa_sll_d(a, ::mem::transmute(b)) } /// Immediate Shift Left /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted left by the imm4 bits. /// The result is written to vector(sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(slli.b, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_slli_b(a: v16i8, imm4: i32) -> v16i8 { macro_rules! call { ($imm4:expr) => { msa_slli_b(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Shift Left /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted left by the imm3 bits. /// The result is written to vector(eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(slli.h, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_slli_h(a: v8i16, imm3: i32) -> v8i16 { macro_rules! call { ($imm3:expr) => { msa_slli_h(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Shift Left /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted left by the imm2 bits. /// The result is written to vector(four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(slli.w, imm2 = 0b11))] #[rustc_args_required_const(1)] pub unsafe fn __msa_slli_w(a: v4i32, imm2: i32) -> v4i32 { macro_rules! call { ($imm2:expr) => { msa_slli_w(a, $imm2) }; } constify_imm2!(imm2, call) } /// Immediate Shift Left /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted left by the imm1 bits. /// The result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(slli.d, imm1 = 0b1))] #[rustc_args_required_const(1)] pub unsafe fn __msa_slli_d(a: v2i64, imm1: i32) -> v2i64 { macro_rules! call { ($imm1:expr) => { msa_slli_d(a, $imm1) }; } constify_imm1!(imm1, call) } /// GPR Element Splat /// /// Replicate vector 'a'(sixteen signed 8-bit integer numbers) /// element with index given by GPR 'b' to all elements in vector /// (sixteen signed 8-bit integer numbers) GPR 'b' value is interpreted /// modulo the number of data format df elements in the destination vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(splat.b))] pub unsafe fn __msa_splat_b(a: v16i8, b: i32) -> v16i8 { msa_splat_b(a, ::mem::transmute(b)) } /// GPR Element Splat /// /// Replicate vector 'a'(eight signed 16-bit integer numbers) /// element with index given by GPR 'b' to all elements in vector /// (eight signed 16-bit integer numbers) GPR 'b' value is interpreted /// modulo the number of data format df elements in the destination vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(splat.h))] pub unsafe fn __msa_splat_h(a: v8i16, b: i32) -> v8i16 { msa_splat_h(a, ::mem::transmute(b)) } /// GPR Element Splat /// /// Replicate vector 'a'(four signed 32-bit integer numbers) /// element with index given by GPR 'b' to all elements in vector /// (four signed 32-bit integer numbers) GPR 'b' value is interpreted /// modulo the number of data format df elements in the destination vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(splat.w))] pub unsafe fn __msa_splat_w(a: v4i32, b: i32) -> v4i32 { msa_splat_w(a, ::mem::transmute(b)) } /// GPR Element Splat /// /// Replicate vector 'a'(two signed 64-bit integer numbers) /// element with index given by GPR 'b' to all elements in vector /// (two signed 64-bit integer numbers) GPR 'b' value is interpreted /// modulo the number of data format df elements in the destination vector. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(splat.d))] pub unsafe fn __msa_splat_d(a: v2i64, b: i32) -> v2i64 { msa_splat_d(a, ::mem::transmute(b)) } /// Immediate Element Splat /// /// Replicate element imm4 in vector 'a'(sixteen signed 8-bit integer numbers) /// to all elements in vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(splati.b, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_splati_b(a: v16i8, imm4: i32) -> v16i8 { macro_rules! call { ($imm4:expr) => { msa_splati_b(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Element Splat /// /// Replicate element imm3 in vector 'a'(eight signed 16-bit integer numbers) /// to all elements in vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(splati.h, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_splati_h(a: v8i16, imm3: i32) -> v8i16 { macro_rules! call { ($imm3:expr) => { msa_splati_h(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Element Splat /// /// Replicate element imm2 in vector 'a'(four signed 32-bit integer numbers) /// to all elements in vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(splati.w, imm2 = 0b11))] #[rustc_args_required_const(1)] pub unsafe fn __msa_splati_w(a: v4i32, imm2: i32) -> v4i32 { macro_rules! call { ($imm2:expr) => { msa_splati_w(a, $imm2) }; } constify_imm2!(imm2, call) } /// Immediate Element Splat /// /// Replicate element imm1 in vector 'a'(two signed 64-bit integer numbers) /// to all elements in vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(splati.d, imm1 = 0b1))] #[rustc_args_required_const(1)] pub unsafe fn __msa_splati_d(a: v2i64, imm1: i32) -> v2i64 { macro_rules! call { ($imm1:expr) => { msa_splati_d(a, $imm1) }; } constify_imm1!(imm1, call) } /// Vector Shift Right Arithmetic /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted right arithmetic by the number of bits the elements in vector 'b' /// (sixteen signed 8-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector(sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sra.b))] pub unsafe fn __msa_sra_b(a: v16i8, b: v16i8) -> v16i8 { msa_sra_b(a, ::mem::transmute(b)) } /// Vector Shift Right Arithmetic /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted right arithmetic by the number of bits the elements in vector 'b' /// (eight signed 16-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector(eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sra.h))] pub unsafe fn __msa_sra_h(a: v8i16, b: v8i16) -> v8i16 { msa_sra_h(a, ::mem::transmute(b)) } /// Vector Shift Right Arithmetic /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted right arithmetic by the number of bits the elements in vector 'b' /// (four signed 32-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector(four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sra.w))] pub unsafe fn __msa_sra_w(a: v4i32, b: v4i32) -> v4i32 { msa_sra_w(a, ::mem::transmute(b)) } /// Vector Shift Right Arithmetic /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted right arithmetic by the number of bits the elements in vector 'b' /// (two signed 64-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(sra.d))] pub unsafe fn __msa_sra_d(a: v2i64, b: v2i64) -> v2i64 { msa_sra_d(a, ::mem::transmute(b)) } /// Immediate Shift Right Arithmetic /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted right arithmetic by imm3 bits. /// The result is written to vector(sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srai.b, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srai_b(a: v16i8, imm3: i32) -> v16i8 { macro_rules! call { ($imm3:expr) => { msa_srai_b(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Shift Right Arithmetic /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted right arithmetic by imm4 bits. /// The result is written to vector(eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srai.h, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srai_h(a: v8i16, imm4: i32) -> v8i16 { macro_rules! call { ($imm4:expr) => { msa_srai_h(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Shift Right Arithmetic /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted right arithmetic by imm5 bits. /// The result is written to vector(four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srai.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srai_w(a: v4i32, imm5: i32) -> v4i32 { macro_rules! call { ($imm5:expr) => { msa_srai_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Shift Right Arithmetic /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted right arithmetic by imm6 bits. /// The result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srai.d, imm6 = 0b111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srai_d(a: v2i64, imm6: i32) -> v2i64 { macro_rules! call { ($imm6:expr) => { msa_srai_d(a, $imm6) }; } constify_imm6!(imm6, call) } /// Vector Shift Right Arithmetic Rounded /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted right arithmetic by the number of bits the elements in vector 'b' /// (sixteen signed 8-bit integer numbers) specify modulo the size of the /// element in bits.The most significant discarded bit is added to the shifted /// value (for rounding) and the result is written to vector(sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srar.b))] pub unsafe fn __msa_srar_b(a: v16i8, b: v16i8) -> v16i8 { msa_srar_b(a, ::mem::transmute(b)) } /// Vector Shift Right Arithmetic Rounded /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted right arithmetic by the number of bits the elements in vector 'b' /// (eight signed 16-bit integer numbers) specify modulo the size of the /// element in bits.The most significant discarded bit is added to the shifted /// value (for rounding) and the result is written to vector(eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srar.h))] pub unsafe fn __msa_srar_h(a: v8i16, b: v8i16) -> v8i16 { msa_srar_h(a, ::mem::transmute(b)) } /// Vector Shift Right Arithmetic Rounded /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted right arithmetic by the number of bits the elements in vector 'b' /// (four signed 32-bit integer numbers) specify modulo the size of the /// element in bits.The most significant discarded bit is added to the shifted /// value (for rounding) and the result is written to vector(four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srar.w))] pub unsafe fn __msa_srar_w(a: v4i32, b: v4i32) -> v4i32 { msa_srar_w(a, ::mem::transmute(b)) } /// Vector Shift Right Arithmetic Rounded /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted right arithmetic by the number of bits the elements in vector 'b' /// (two signed 64-bit integer numbers) specify modulo the size of the /// element in bits.The most significant discarded bit is added to the shifted /// value (for rounding) and the result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srar.d))] pub unsafe fn __msa_srar_d(a: v2i64, b: v2i64) -> v2i64 { msa_srar_d(a, ::mem::transmute(b)) } /// Immediate Shift Right Arithmetic Rounded /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted right arithmetic by imm3 bits.The most significant /// discarded bit is added to the shifted value (for rounding) and /// the result is written to vector(sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srari.b, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srari_b(a: v16i8, imm3: i32) -> v16i8 { macro_rules! call { ($imm3:expr) => { msa_srari_b(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Shift Right Arithmetic Rounded /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted right arithmetic by imm4 bits.The most significant /// discarded bit is added to the shifted value (for rounding) and /// the result is written to vector(eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srari.h, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srari_h(a: v8i16, imm4: i32) -> v8i16 { macro_rules! call { ($imm4:expr) => { msa_srari_h(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Shift Right Arithmetic Rounded /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted right arithmetic by imm5 bits.The most significant /// discarded bit is added to the shifted value (for rounding) and /// the result is written to vector(four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srari.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srari_w(a: v4i32, imm5: i32) -> v4i32 { macro_rules! call { ($imm5:expr) => { msa_srari_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Shift Right Arithmetic Rounded /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted right arithmetic by imm6 bits.The most significant /// discarded bit is added to the shifted value (for rounding) and /// the result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srari.d, imm6 = 0b111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srari_d(a: v2i64, imm6: i32) -> v2i64 { macro_rules! call { ($imm6:expr) => { msa_srari_d(a, $imm6) }; } constify_imm6!(imm6, call) } /// Vector Shift Right Logical /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted right logical by the number of bits the elements in vector 'b' /// (sixteen signed 8-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector(sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srl.b))] pub unsafe fn __msa_srl_b(a: v16i8, b: v16i8) -> v16i8 { msa_srl_b(a, ::mem::transmute(b)) } /// Vector Shift Right Logical /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted right logical by the number of bits the elements in vector 'b' /// (eight signed 16-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector(eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srl.h))] pub unsafe fn __msa_srl_h(a: v8i16, b: v8i16) -> v8i16 { msa_srl_h(a, ::mem::transmute(b)) } /// Vector Shift Right Logical /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted right logical by the number of bits the elements in vector 'b' /// (four signed 32-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector(four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srl.w))] pub unsafe fn __msa_srl_w(a: v4i32, b: v4i32) -> v4i32 { msa_srl_w(a, ::mem::transmute(b)) } /// Vector Shift Right Logical /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted right logical by the number of bits the elements in vector 'b' /// (two signed 64-bit integer numbers) specify modulo the size of the /// element in bits.The result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srl.d))] pub unsafe fn __msa_srl_d(a: v2i64, b: v2i64) -> v2i64 { msa_srl_d(a, ::mem::transmute(b)) } /// Immediate Shift Right Logical /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted right logical by imm4 bits. /// The result is written to vector(sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srli.b, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srli_b(a: v16i8, imm4: i32) -> v16i8 { macro_rules! call { ($imm4:expr) => { msa_srli_b(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Shift Right Logical /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted right logical by imm3 bits. /// The result is written to vector(eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srli.h, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srli_h(a: v8i16, imm3: i32) -> v8i16 { macro_rules! call { ($imm3:expr) => { msa_srli_h(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Shift Right Logical /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted right logical by imm2 bits. /// The result is written to vector(four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srli.w, imm2 = 0b11))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srli_w(a: v4i32, imm2: i32) -> v4i32 { macro_rules! call { ($imm2:expr) => { msa_srli_w(a, $imm2) }; } constify_imm2!(imm2, call) } /// Immediate Shift Right Logical /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted right logical by imm1 bits. /// The result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srli.d, imm1 = 0b1))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srli_d(a: v2i64, imm1: i32) -> v2i64 { macro_rules! call { ($imm1:expr) => { msa_srli_d(a, $imm1) }; } constify_imm1!(imm1, call) } /// Vector Shift Right Logical Rounded /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted right logical by the number of bits the elements in vector 'b' /// (sixteen signed 8-bit integer numbers) specify modulo the size of the /// element in bits.The most significant discarded bit is added to the shifted /// value (for rounding) and the result is written to vector(sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srlr.b))] pub unsafe fn __msa_srlr_b(a: v16i8, b: v16i8) -> v16i8 { msa_srlr_b(a, ::mem::transmute(b)) } /// Vector Shift Right Logical Rounded /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted right logical by the number of bits the elements in vector 'b' /// (eight signed 16-bit integer numbers) specify modulo the size of the /// element in bits.The most significant discarded bit is added to the shifted /// value (for rounding) and the result is written to vector(eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srlr.h))] pub unsafe fn __msa_srlr_h(a: v8i16, b: v8i16) -> v8i16 { msa_srlr_h(a, ::mem::transmute(b)) } /// Vector Shift Right Logical Rounded /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted right logical by the number of bits the elements in vector 'b' /// (four signed 32-bit integer numbers) specify modulo the size of the /// element in bits.The most significant discarded bit is added to the shifted /// value (for rounding) and the result is written to vector(four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srlr.w))] pub unsafe fn __msa_srlr_w(a: v4i32, b: v4i32) -> v4i32 { msa_srlr_w(a, ::mem::transmute(b)) } /// Vector Shift Right Logical Rounded /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted right logical by the number of bits the elements in vector 'b' /// (two signed 64-bit integer numbers) specify modulo the size of the /// element in bits.The most significant discarded bit is added to the shifted /// value (for rounding) and the result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srlr.d))] pub unsafe fn __msa_srlr_d(a: v2i64, b: v2i64) -> v2i64 { msa_srlr_d(a, ::mem::transmute(b)) } /// Immediate Shift Right Logical Rounded /// /// The elements in vector 'a'(sixteen signed 8-bit integer numbers) /// are shifted right logical by imm6 bits.The most significant /// discarded bit is added to the shifted value (for rounding) and /// the result is written to vector(sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srlri.b, imm3 = 0b111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srlri_b(a: v16i8, imm3: i32) -> v16i8 { macro_rules! call { ($imm3:expr) => { msa_srlri_b(a, $imm3) }; } constify_imm3!(imm3, call) } /// Immediate Shift Right Logical Rounded /// /// The elements in vector 'a'(eight signed 16-bit integer numbers) /// are shifted right logical by imm6 bits.The most significant /// discarded bit is added to the shifted value (for rounding) and /// the result is written to vector(eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srlri.h, imm4 = 0b1111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srlri_h(a: v8i16, imm4: i32) -> v8i16 { macro_rules! call { ($imm4:expr) => { msa_srlri_h(a, $imm4) }; } constify_imm4!(imm4, call) } /// Immediate Shift Right Logical Rounded /// /// The elements in vector 'a'(four signed 32-bit integer numbers) /// are shifted right logical by imm6 bits.The most significant /// discarded bit is added to the shifted value (for rounding) and /// the result is written to vector(four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srlri.w, imm5 = 0b11111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srlri_w(a: v4i32, imm5: i32) -> v4i32 { macro_rules! call { ($imm5:expr) => { msa_srlri_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Shift Right Logical Rounded /// /// The elements in vector 'a'(two signed 64-bit integer numbers) /// are shifted right logical by imm6 bits.The most significant /// discarded bit is added to the shifted value (for rounding) and /// the result is written to vector(two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(srlri.d, imm6 = 0b111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_srlri_d(a: v2i64, imm6: i32) -> v2i64 { macro_rules! call { ($imm6:expr) => { msa_srlri_d(a, $imm6) }; } constify_imm6!(imm6, call) } /// Vector Store /// /// TheWRLEN / 8 bytes in vector 'a'(sixteen signed 8-bit integer numbers) /// are stored as elements of data format df at the effective memory location /// addressed by the base mem_addr and the 10-bit signed immediate offset imm_s10 /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(st.b, imm_s10 = 0b1111111111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_st_b(a: v16i8, mem_addr: *mut u8, imm_s10: i32) -> () { macro_rules! call { ($imm_s10:expr) => { msa_st_b(a, mem_addr, $imm_s10) }; } constify_imm_s10!(imm_s10, call) } /// Vector Store /// /// TheWRLEN / 8 bytes in vector 'a'(eight signed 16-bit integer numbers) /// are stored as elements of data format df at the effective memory location /// addressed by the base mem_addr and the 11-bit signed immediate offset imm_s11 /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(st.h, imm_s11 = 0b11111111111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_st_h(a: v8i16, mem_addr: *mut u8, imm_s11: i32) -> () { macro_rules! call { ($imm_s11:expr) => { msa_st_h(a, mem_addr, $imm_s11) }; } constify_imm_s11!(imm_s11, call) } /// Vector Store /// /// TheWRLEN / 8 bytes in vector 'a'(four signed 32-bit integer numbers) /// are stored as elements of data format df at the effective memory location /// addressed by the base mem_addr and the 12-bit signed immediate offset imm_s12 /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(st.w, imm_s12 = 0b111111111111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_st_w(a: v4i32, mem_addr: *mut u8, imm_s12: i32) -> () { macro_rules! call { ($imm_s12:expr) => { msa_st_w(a, mem_addr, $imm_s12) }; } constify_imm_s12!(imm_s12, call) } /// Vector Store /// /// TheWRLEN / 8 bytes in vector 'a'(two signed 64-bit integer numbers) /// are stored as elements of data format df at the effective memory location /// addressed by the base mem_addr and the 13-bit signed immediate offset imm_s13 /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(st.d, imm_s13 = 0b1111111111111))] #[rustc_args_required_const(2)] pub unsafe fn __msa_st_d(a: v2i64, mem_addr: *mut u8, imm_s13: i32) -> () { macro_rules! call { ($imm_s13:expr) => { msa_st_d(a, mem_addr, $imm_s13) }; } constify_imm_s13!(imm_s13, call) } /// Vector Signed Saturated Subtract of Signed Values /// /// The elements in vector `b` (sixteen signed 8-bit integer numbers) /// are subtracted from the elements in vector `a` (sixteen signed 8-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subs_s.b))] pub unsafe fn __msa_subs_s_b(a: v16i8, b: v16i8) -> v16i8 { msa_subs_s_b(a, ::mem::transmute(b)) } /// Vector Signed Saturated Subtract of Signed Values /// /// The elements in vector `b` (eight signed 16-bit integer numbers) /// are subtracted from the elements in vector `a` (eight signed 16-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subs_s.h))] pub unsafe fn __msa_subs_s_h(a: v8i16, b: v8i16) -> v8i16 { msa_subs_s_h(a, ::mem::transmute(b)) } /// Vector Signed Saturated Subtract of Signed Values /// /// The elements in vector `b` (four signed 32-bit integer numbers) /// are subtracted from the elements in vector `a` (four signed 32-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subs_s.w))] pub unsafe fn __msa_subs_s_w(a: v4i32, b: v4i32) -> v4i32 { msa_subs_s_w(a, ::mem::transmute(b)) } /// Vector Signed Saturated Subtract of Signed Values /// /// The elements in vector `b` (two signed 64-bit integer numbers) /// are subtracted from the elements in vector `a` (two signed 64-bit integer numbers) /// Signed arithmetic is performed and overflows clamp to the largest and/or smallest /// representable signed values before writing the result to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subs_s.d))] pub unsafe fn __msa_subs_s_d(a: v2i64, b: v2i64) -> v2i64 { msa_subs_s_d(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Subtract of Unsigned Values /// /// The elements in vector `b` (sixteen unsigned 8-bit integer numbers) /// are subtracted from the elements in vector `a` (sixteen unsigned 8-bit integer numbers) /// Unsigned arithmetic is performed and under-flows clamp to 0 before writing /// the result to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subs_u.b))] pub unsafe fn __msa_subs_u_b(a: v16u8, b: v16u8) -> v16u8 { msa_subs_u_b(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Subtract of Unsigned Values /// /// The elements in vector `b` (eight unsigned 16-bit integer numbers) /// are subtracted from the elements in vector `a` (eight unsigned 16-bit integer numbers) /// Unsigned arithmetic is performed and under-flows clamp to 0 before writing /// the result to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subs_u.h))] pub unsafe fn __msa_subs_u_h(a: v8u16, b: v8u16) -> v8u16 { msa_subs_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Subtract of Unsigned Values /// /// The elements in vector `b` (four unsigned 32-bit integer numbers) /// are subtracted from the elements in vector `a` (four unsigned 32-bit integer numbers) /// Unsigned arithmetic is performed and under-flows clamp to 0 before writing /// the result to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subs_u.w))] pub unsafe fn __msa_subs_u_w(a: v4u32, b: v4u32) -> v4u32 { msa_subs_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Subtract of Unsigned Values /// /// The elements in vector `b` (two unsigned 64-bit integer numbers) /// are subtracted from the elements in vector `a` (two unsigned 64-bit integer numbers) /// Unsigned arithmetic is performed and under-flows clamp to 0 before writing /// the result to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subs_u.d))] pub unsafe fn __msa_subs_u_d(a: v2u64, b: v2u64) -> v2u64 { msa_subs_u_d(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Subtract of Signed from Unsigned /// /// The signed elements in vector `b` (sixteen signed 8-bit integer numbers) /// are subtracted from the unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers) /// The signed result is unsigned saturated and written to /// to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subsus_u.b))] pub unsafe fn __msa_subsus_u_b(a: v16u8, b: v16i8) -> v16u8 { msa_subsus_u_b(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Subtract of Signed from Unsigned /// /// The signed elements in vector `b` (eight signed 16-bit integer numbers) /// are subtracted from the unsigned elements in vector `a` (eight unsigned 16-bit integer numbers) /// The signed result is unsigned saturated and written to /// to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subsus_u.h))] pub unsafe fn __msa_subsus_u_h(a: v8u16, b: v8i16) -> v8u16 { msa_subsus_u_h(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Subtract of Signed from Unsigned /// /// The signed elements in vector `b` (four signed 6432it integer numbers) /// are subtracted from the unsigned elements in vector `a` (four unsigned 32-bit integer numbers) /// The signed result is unsigned saturated and written to /// to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subsus_u.w))] pub unsafe fn __msa_subsus_u_w(a: v4u32, b: v4i32) -> v4u32 { msa_subsus_u_w(a, ::mem::transmute(b)) } /// Vector Unsigned Saturated Subtract of Signed from Unsigned /// /// The signed elements in vector `b` (two signed 64-bit integer numbers) /// are subtracted from the unsigned elements in vector `a` (two unsigned 64-bit integer numbers) /// The signed result is unsigned saturated and written to /// to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subsus_u.d))] pub unsafe fn __msa_subsus_u_d(a: v2u64, b: v2i64) -> v2u64 { msa_subsus_u_d(a, ::mem::transmute(b)) } /// Vector Signed Saturated Subtract of Unsigned Values /// /// The unsigned elements in vector `b` (sixteen unsigned 8-bit integer numbers) /// are subtracted from the unsigned elements in vector `a` (sixteen unsigned 8-bit integer numbers) /// The signed result is signed saturated and written to /// to vector (sixteen unsigned 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subsuu_s.b))] pub unsafe fn __msa_subsuu_s_b(a: v16u8, b: v16u8) -> v16i8 { msa_subsuu_s_b(a, ::mem::transmute(b)) } /// Vector Signed Saturated Subtract of Unsigned Values /// /// The unsigned elements in vector `b` (eight unsigned 16-bit integer numbers) /// are subtracted from the unsigned elements in vector `a` (eight unsigned 16-bit integer numbers) /// The signed result is signed saturated and written to /// to vector (eight unsigned 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subsuu_s.h))] pub unsafe fn __msa_subsuu_s_h(a: v8u16, b: v8u16) -> v8i16 { msa_subsuu_s_h(a, ::mem::transmute(b)) } /// Vector Signed Saturated Subtract of Unsigned Values /// /// The unsigned elements in vector `b` (four unsigned 32-bit integer numbers) /// are subtracted from the unsigned elements in vector `a` (four unsigned 32-bit integer numbers) /// The signed result is signed saturated and written to /// to vector (four unsigned 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subsuu_s.w))] pub unsafe fn __msa_subsuu_s_w(a: v4u32, b: v4u32) -> v4i32 { msa_subsuu_s_w(a, ::mem::transmute(b)) } /// Vector Signed Saturated Subtract of Unsigned Values /// /// The unsigned elements in vector `b` (two unsigned 64-bit integer numbers) /// are subtracted from the unsigned elements in vector `a` (two unsigned 64-bit integer numbers) /// The signed result is signed saturated and written to /// to vector (two unsigned 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subsuu_s.d))] pub unsafe fn __msa_subsuu_s_d(a: v2u64, b: v2u64) -> v2i64 { msa_subsuu_s_d(a, ::mem::transmute(b)) } /// Vector Subtract /// /// The elements in vector `b` (sixteen signed 8-bit integer numbers) /// are subtracted from the elements in vector `a` (sixteen signed 8-bit integer numbers) /// The result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subv.b))] pub unsafe fn __msa_subv_b(a: v16i8, b: v16i8) -> v16i8 { msa_subv_b(a, ::mem::transmute(b)) } /// Vector Subtract /// /// The elements in vector `b` (eight signed 16-bit integer numbers) /// are subtracted from the elements in vector `a` (eight signed 16-bit integer numbers) /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subv.h))] pub unsafe fn __msa_subv_h(a: v8i16, b: v8i16) -> v8i16 { msa_subv_h(a, ::mem::transmute(b)) } /// Vector Subtract /// /// The elements in vector `b` (four signed 32-bit integer numbers) /// are subtracted from the elements in vector `a` (four signed 32-bit integer numbers) /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subv.w))] pub unsafe fn __msa_subv_w(a: v4i32, b: v4i32) -> v4i32 { msa_subv_w(a, ::mem::transmute(b)) } /// Vector Subtract /// /// The elements in vector `b` (two signed 64-bit integer numbers) /// are subtracted from the elements in vector `a` (two signed 64-bit integer numbers) /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subv.d))] pub unsafe fn __msa_subv_d(a: v2i64, b: v2i64) -> v2i64 { msa_subv_d(a, ::mem::transmute(b)) } /// Immediate Subtract /// /// The 5-bit immediate unsigned value imm5 /// are subtracted from the elements in vector `a` (sixteen signed 8-bit integer numbers) /// The result is written to vector (sixteen signed 8-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subvi.b, imm5 = 0b10111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_subvi_b(a: v16i8, imm5: i32) -> v16i8 { macro_rules! call { ($imm5:expr) => { msa_subvi_b(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Subtract /// /// The 5-bit immediate unsigned value imm5 /// are subtracted from the elements in vector `a` (eight signed 16-bit integer numbers) /// The result is written to vector (eight signed 16-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subvi.h, imm5 = 0b10111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_subvi_h(a: v8i16, imm5: i32) -> v8i16 { macro_rules! call { ($imm5:expr) => { msa_subvi_h(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Subtract /// /// The 5-bit immediate unsigned value imm5 /// are subtracted from the elements in vector `a` (four signed 32-bit integer numbers) /// The result is written to vector (four signed 32-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subvi.w, imm5 = 0b10111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_subvi_w(a: v4i32, imm5: i32) -> v4i32 { macro_rules! call { ($imm5:expr) => { msa_subvi_w(a, $imm5) }; } constify_imm5!(imm5, call) } /// Immediate Subtract /// /// The 5-bit immediate unsigned value imm5 /// are subtracted from the elements in vector `a` (two signed 64-bit integer numbers) /// The result is written to vector (two signed 64-bit integer numbers). /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(subvi.d, imm5 = 0b10111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_subvi_d(a: v2i64, imm5: i32) -> v2i64 { macro_rules! call { ($imm5:expr) => { msa_subvi_d(a, $imm5) }; } constify_imm5!(imm5, call) } /// Vector Data Preserving Shuffle /// /// The vector shuffle instructions selectively copy data elements from the /// concatenation of vectors 'b' (sixteen signed 8-bit integer numbers) /// and `c` (sixteen signed 8-bit integer numbers) in to vector 'a' /// (sixteen signed 8-bit integer numbers) based on the corresponding control element in 'a' /// The least significant 6 bits in 'a' control elements modulo the number of elements in /// the concatenated vectors 'b','a' specify the index of the source element. /// If bit 6 or bit 7 is 1, there will be no copy, but rather the destination elementis set to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(vshf.b))] pub unsafe fn __msa_vshf_b(a: v16i8, b: v16i8, c: v16i8) -> v16i8 { msa_vshf_b(a, ::mem::transmute(b), c) } /// Vector Data Preserving Shuffle /// /// The vector shuffle instructions selectively copy data elements from the /// concatenation of vectors 'b' (eight signed 16-bit integer numbers) /// and `c` (eight signed 16-bit integer numbers) in to vector 'a' /// (eight signed 16-bit integer numbers) based on the corresponding control element in 'a' /// The least significant 6 bits in 'a' control elements modulo the number of elements in /// the concatenated vectors 'b','a' specify the index of the source element. /// If bit 6 or bit 7 is 1, there will be no copy, but rather the destination elementis set to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(vshf.h))] pub unsafe fn __msa_vshf_h(a: v8i16, b: v8i16, c: v8i16) -> v8i16 { msa_vshf_h(a, ::mem::transmute(b), c) } /// Vector Data Preserving Shuffle /// /// The vector shuffle instructions selectively copy data elements from the /// concatenation of vectors 'b' (four signed 32-bit integer numbers) /// and `c` (four signed 32-bit integer numbers) in to vector 'a' /// (four signed 32-bit integer numbers) based on the corresponding control element in 'a' /// The least significant 6 bits in 'a' control elements modulo the number of elements in /// the concatenated vectors 'b','a' specify the index of the source element. /// If bit 6 or bit 7 is 1, there will be no copy, but rather the destination elementis set to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(vshf.w))] pub unsafe fn __msa_vshf_w(a: v4i32, b: v4i32, c: v4i32) -> v4i32 { msa_vshf_w(a, ::mem::transmute(b), c) } /// Vector Data Preserving Shuffle /// /// The vector shuffle instructions selectively copy data elements from the /// concatenation of vectors 'b' (two signed 64-bit integer numbers) /// and `c` (two signed 64-bit integer numbers) in to vector 'a' /// (two signed 64-bit integer numbers) based on the corresponding control element in 'a' /// The least significant 6 bits in 'a' control elements modulo the number of elements in /// the concatenated vectors 'b','a' specify the index of the source element. /// If bit 6 or bit 7 is 1, there will be no copy, but rather the destination elementis set to 0. /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(vshf.d))] pub unsafe fn __msa_vshf_d(a: v2i64, b: v2i64, c: v2i64) -> v2i64 { msa_vshf_d(a, ::mem::transmute(b), c) } /// Vector Logical Exclusive Or /// /// Each bit of vector 'a'(sixteen unsigned 8-bit integer numbers) /// is combined with the corresponding bit of vector 'b' (sixteen unsigned 8-bit integer numbers) /// in a bitwise logical XOR operation. The result is written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(xor.v))] pub unsafe fn __msa_xor_v(a: v16u8, b: v16u8) -> v16u8 { msa_xor_v(a, ::mem::transmute(b)) } /// Immediate Logical Exclusive Or /// /// Each byte of vector 'a'(sixteen unsigned 8-bit integer numbers) /// is combined with the 8-bit immediate imm8 /// in a bitwise logical XOR operation. The result is written to vector /// (sixteen unsigned 8-bit integer numbers) /// #[inline] #[target_feature(enable = "msa")] #[cfg_attr(test, assert_instr(xori.b, imm8 = 0b11111111))] #[rustc_args_required_const(1)] pub unsafe fn __msa_xori_b(a: v16u8, imm8: i32) -> v16u8 { macro_rules! call { ($imm8:expr) => { msa_xori_b(a, $imm8) }; } constify_imm8!(imm8, call) } #[cfg(test)] mod tests { use crate::core_arch::mips::msa::*; use core_arch::simd::*; use std::f32; use std::f64; use std::mem; use stdsimd_test::simd_test; #[simd_test(enable = "msa")] unsafe fn test_msa_add_a_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i8x16::new( -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1, -4, -3, -2, -1 ); #[rustfmt::skip] let r = i8x16::new( 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 ); assert_eq!( r, ::mem::transmute(__msa_add_a_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_add_a_h() { #[rustfmt::skip] let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = i16x8::new(-4, -3, -2, -1, -4, -3, -2, -1); #[rustfmt::skip] let r = i16x8::new(5, 5, 5, 5, 5, 5, 5, 5); assert_eq!( r, ::mem::transmute(__msa_add_a_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_add_a_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(-4, -3, -2, -1); #[rustfmt::skip] let r = i32x4::new(5, 5, 5, 5); assert_eq!( r, ::mem::transmute(__msa_add_a_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_add_a_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(-4, -3); #[rustfmt::skip] let r = i64x2::new(5, 5); assert_eq!( r, ::mem::transmute(__msa_add_a_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_a_b() { #[rustfmt::skip] let a = i8x16::new( 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value() ); #[rustfmt::skip] let b = i8x16::new( -4, -3, -2, -100, -4, -3, -2, -100, -4, -3, -2, -100, -4, -3, -2, -100 ); #[rustfmt::skip] let r = i8x16::new( 104, 127, 102, 127, 104, 127, 102, 127, 104, 127, 102, 127, 104, 127, 102, 127 ); assert_eq!( r, ::mem::transmute(__msa_adds_a_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_a_h() { #[rustfmt::skip] let a = i16x8::new( 100, i16::max_value(), 100, i16::max_value(), 100, i16::max_value(), 100, i16::max_value() ); #[rustfmt::skip] let b = i16x8::new(-4, -3, -2, -1, -4, -3, -2, -1); #[rustfmt::skip] let r = i16x8::new( 104, i16::max_value(), 102, i16::max_value(), 104, i16::max_value(), 102, i16::max_value() ); assert_eq!( r, ::mem::transmute(__msa_adds_a_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_a_w() { #[rustfmt::skip] let a = i32x4::new(100, i32::max_value(), 100, i32::max_value()); #[rustfmt::skip] let b = i32x4::new(-4, -3, -2, -1); #[rustfmt::skip] let r = i32x4::new(104, i32::max_value(), 102, i32::max_value()); assert_eq!( r, ::mem::transmute(__msa_adds_a_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_a_d() { #[rustfmt::skip] let a = i64x2::new(100, i64::max_value()); #[rustfmt::skip] let b = i64x2::new(-4, -3); #[rustfmt::skip] let r = i64x2::new(104, i64::max_value()); assert_eq!( r, ::mem::transmute(__msa_adds_a_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_s_b() { #[rustfmt::skip] let a = i8x16::new( 100, i8::min_value(), 100, i8::max_value(), 100, i8::min_value(), 100, i8::max_value(), 100, i8::min_value(), 100, i8::max_value(), 100, i8::min_value(), 100, i8::max_value() ); #[rustfmt::skip] let b = i8x16::new( -4, -3, -2, 100, -4, -3, -2, 100, -4, -3, -2, 100, -4, -3, -2, 100 ); #[rustfmt::skip] let r = i8x16::new( 96, i8::min_value(), 98, i8::max_value(), 96, i8::min_value(), 98, i8::max_value(), 96, i8::min_value(), 98, i8::max_value(), 96, i8::min_value(), 98, i8::max_value() ); assert_eq!( r, ::mem::transmute(__msa_adds_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_s_h() { #[rustfmt::skip] let a = i16x8::new( 100, i16::min_value(), 100, i16::max_value(), 100, i16::min_value(), 100, i16::max_value() ); #[rustfmt::skip] let b = i16x8::new(-4, -3, -2, 1, -4, -3, -2, 1); #[rustfmt::skip] let r = i16x8::new( 96, i16::min_value(), 98, i16::max_value(), 96, i16::min_value(), 98, i16::max_value() ); assert_eq!( r, ::mem::transmute(__msa_adds_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_s_w() { #[rustfmt::skip] let a = i32x4::new(100, i32::max_value(), 100, i32::min_value()); #[rustfmt::skip] let b = i32x4::new(-4, 3, -2, -1); #[rustfmt::skip] let r = i32x4::new(96, i32::max_value(), 98, i32::min_value()); assert_eq!( r, ::mem::transmute(__msa_adds_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_s_d() { #[rustfmt::skip] let a = i64x2::new(100, i64::min_value()); #[rustfmt::skip] let b = i64x2::new(-4, -3); #[rustfmt::skip] let r = i64x2::new(96, i64::min_value()); assert_eq!( r, ::mem::transmute(__msa_adds_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_u_b() { #[rustfmt::skip] let a = u8x16::new( 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value() ); #[rustfmt::skip] let b = u8x16::new( 4, 3, 2, 100, 4, 3, 2, 100, 4, 3, 2, 100, 4, 3, 2, 100 ); #[rustfmt::skip] let r = u8x16::new( 104, u8::max_value(), 102, u8::max_value(), 104, u8::max_value(), 102, u8::max_value(), 104, u8::max_value(), 102, u8::max_value(), 104, u8::max_value(), 102, u8::max_value() ); assert_eq!( r, ::mem::transmute(__msa_adds_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_u_h() { #[rustfmt::skip] let a = u16x8::new( 100, u16::max_value(), 100, u16::max_value(), 100, u16::max_value(), 100, u16::max_value() ); #[rustfmt::skip] let b = u16x8::new(4, 3, 2, 1, 4, 3, 2, 1); #[rustfmt::skip] let r = u16x8::new( 104, u16::max_value(), 102, u16::max_value(), 104, u16::max_value(), 102, u16::max_value() ); assert_eq!( r, ::mem::transmute(__msa_adds_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_u_w() { #[rustfmt::skip] let a = u32x4::new(100, u32::max_value(), 100, u32::max_value()); #[rustfmt::skip] let b = u32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = u32x4::new(104, u32::max_value(), 102, u32::max_value()); assert_eq!( r, ::mem::transmute(__msa_adds_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_adds_u_d() { #[rustfmt::skip] let a = u64x2::new(100, u64::max_value()); #[rustfmt::skip] let b = u64x2::new(4, 3); #[rustfmt::skip] let r = u64x2::new(104, u64::max_value()); assert_eq!( r, ::mem::transmute(__msa_adds_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_addv_b() { #[rustfmt::skip] let a = i8x16::new( 100, i8::min_value(), 100, i8::max_value(), 100, i8::min_value(), 100, i8::max_value(), 100, i8::min_value(), 100, i8::max_value(), 100, i8::min_value(), 100, i8::max_value() ); #[rustfmt::skip] let b = i8x16::new( -4, -3, -2, 100, -4, -3, -2, 100, -4, -3, -2, 100, -4, -3, -2, 100 ); #[rustfmt::skip] let r = i8x16::new( 96, 125, 98, -29, 96, 125, 98, -29, 96, 125, 98, -29, 96, 125, 98, -29 ); assert_eq!( r, ::mem::transmute(__msa_addv_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_addv_h() { #[rustfmt::skip] let a = i16x8::new( 100, i16::min_value(), 100, i16::max_value(), 100, i16::min_value(), 100, i16::max_value() ); #[rustfmt::skip] let b = i16x8::new(-4, -3, -2, 1, -4, -3, -2, 1); #[rustfmt::skip] let r = i16x8::new(96, 32765, 98, -32768, 96, 32765, 98, -32768); assert_eq!( r, ::mem::transmute(__msa_addv_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_addv_w() { #[rustfmt::skip] let a = i32x4::new(100, i32::max_value(), 100, i32::min_value()); #[rustfmt::skip] let b = i32x4::new(-4, 3, -2, -1); #[rustfmt::skip] let r = i32x4::new(96, -2147483646, 98, 2147483647); assert_eq!( r, ::mem::transmute(__msa_addv_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_addv_d() { #[rustfmt::skip] let a = i64x2::new(100, i64::min_value()); #[rustfmt::skip] let b = i64x2::new(-4, -3); #[rustfmt::skip] let r = i64x2::new(96, 9223372036854775805); assert_eq!( r, ::mem::transmute(__msa_addv_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_addvi_b() { #[rustfmt::skip] let a = i8x16::new( 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value(), 100, i8::max_value() ); #[rustfmt::skip] let r = i8x16::new( 103, -126, 103, -126, 103, -126, 103, -126, 103, -126, 103, -126, 103, -126, 103, -126 ); assert_eq!(r, ::mem::transmute(__msa_addvi_b(::mem::transmute(a), 67))); } #[simd_test(enable = "msa")] unsafe fn test_msa_addvi_h() { #[rustfmt::skip] let a = i16x8::new( i16::max_value(), 3276, -100, -127, i16::max_value(), 3276, -100, -127 ); #[rustfmt::skip] let r = i16x8::new( -32766, 3279, -97, -124, -32766, 3279, -97, -124 ); assert_eq!(r, ::mem::transmute(__msa_addvi_h(::mem::transmute(a), 67))); } #[simd_test(enable = "msa")] unsafe fn test_msa_addvi_w() { #[rustfmt::skip] let a = i32x4::new(100, i32::max_value(), 100, i32::min_value()); #[rustfmt::skip] let r = i32x4::new(103, -2147483646, 103, -2147483645); assert_eq!(r, ::mem::transmute(__msa_addvi_w(::mem::transmute(a), 67))); } #[simd_test(enable = "msa")] unsafe fn test_msa_addvi_d() { #[rustfmt::skip] let a = i64x2::new(100, i64::min_value()); #[rustfmt::skip] let r = i64x2::new(117, -9223372036854775791); assert_eq!(r, ::mem::transmute(__msa_addvi_d(::mem::transmute(a), 17))); } #[simd_test(enable = "msa")] unsafe fn test_msa_and_v() { #[rustfmt::skip] let a = u8x16::new( 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value() ); #[rustfmt::skip] let b = u8x16::new( 4, 3, 2, 100, 4, 3, 2, 100, 4, 3, 2, 100, 4, 3, 2, 100 ); #[rustfmt::skip] let r = u8x16::new( 4, 3, 0, 100, 4, 3, 0, 100, 4, 3, 0, 100, 4, 3, 0, 100 ); assert_eq!( r, ::mem::transmute(__msa_and_v(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_andi_b() { #[rustfmt::skip] let a = u8x16::new( 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value(), 100, u8::max_value() ); #[rustfmt::skip] let r = u8x16::new( 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5 ); assert_eq!(r, ::mem::transmute(__msa_andi_b(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_asub_s_b() { #[rustfmt::skip] let a = i8x16::new( -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9 ); #[rustfmt::skip] let r = i8x16::new( 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 ); assert_eq!( r, ::mem::transmute(__msa_asub_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_asub_s_h() { #[rustfmt::skip] let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, -4); #[rustfmt::skip] let b = i16x8::new(-6, -7, -8, -9, -6, -7, -8, -9); #[rustfmt::skip] let r = i16x8::new(5, 5, 5, 5, 5, 5, 5, 5); assert_eq!( r, ::mem::transmute(__msa_asub_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_asub_s_w() { #[rustfmt::skip] let a = i32x4::new(-1, -2, -3, -4); #[rustfmt::skip] let b = i32x4::new(-6, -7, -8, -9); #[rustfmt::skip] let r = i32x4::new(5, 5, 5, 5); assert_eq!( r, ::mem::transmute(__msa_asub_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_asub_s_d() { #[rustfmt::skip] let a = i64x2::new(-1, -2); #[rustfmt::skip] let b = i64x2::new(-6, -7); #[rustfmt::skip] let r = i64x2::new(5, 5); assert_eq!( r, ::mem::transmute(__msa_asub_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_asub_u_b() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 ); assert_eq!( r, ::mem::transmute(__msa_asub_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_asub_u_h() { #[rustfmt::skip] let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u16x8::new(5, 5, 5, 5, 5, 5, 5, 5); assert_eq!( r, ::mem::transmute(__msa_asub_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_asub_u_w() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(5, 5, 5, 5); assert_eq!( r, ::mem::transmute(__msa_asub_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_asub_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 2); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = u64x2::new(5, 5); assert_eq!( r, ::mem::transmute(__msa_asub_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ave_s_b() { #[rustfmt::skip] let a = i8x16::new( -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( 6, -7, 8, -9, 6, -7, 8, -9, 6, -7, 8, -9, 6, -7, 8, -9 ); #[rustfmt::skip] let r = i8x16::new( 2, -5, 2, -7, 2, -5, 2, -7, 2, -5, 2, -7, 2, -5, 2, -7 ); assert_eq!( r, ::mem::transmute(__msa_ave_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ave_s_h() { #[rustfmt::skip] let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, -4); #[rustfmt::skip] let b = i16x8::new(6, -7, 8, -9, 6, -7, 8, -9); #[rustfmt::skip] let r = i16x8::new(2, -5, 2, -7, 2, -5, 2, -7); assert_eq!( r, ::mem::transmute(__msa_ave_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ave_s_w() { #[rustfmt::skip] let a = i32x4::new(-1, -2, -3, -4); #[rustfmt::skip] let b = i32x4::new(6, -7, 8, -9); #[rustfmt::skip] let r = i32x4::new(2, -5, 2, -7); assert_eq!( r, ::mem::transmute(__msa_ave_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ave_s_d() { #[rustfmt::skip] let a = i64x2::new(-1, -2); #[rustfmt::skip] let b = i64x2::new(-6, -7); #[rustfmt::skip] let r = i64x2::new(-4, -5); assert_eq!( r, ::mem::transmute(__msa_ave_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ave_u_b() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 3, 4, 5, 6, 3, 4, 5, 6, 3, 4, 5, 6, 3, 4, 5, 6 ); assert_eq!( r, ::mem::transmute(__msa_ave_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ave_u_h() { #[rustfmt::skip] let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u16x8::new(3, 4, 5, 6, 3, 4, 5, 6); assert_eq!( r, ::mem::transmute(__msa_ave_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ave_u_w() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(3, 4, 5, 6); assert_eq!( r, ::mem::transmute(__msa_ave_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ave_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 2); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = u64x2::new(3, 4); assert_eq!( r, ::mem::transmute(__msa_ave_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_aver_s_b() { #[rustfmt::skip] let a = i8x16::new( -1, -2, 3, -4, -1, -2, 3, -4, -1, -2, 3, -4, -1, -2, 3, -4 ); #[rustfmt::skip] let b = i8x16::new( -6, 7, -8, -9, -6, 7, -8, -9, -6, 7, -8, -9, -6, 7, -8, -9 ); #[rustfmt::skip] let r = i8x16::new( -3, 3, -2, -6, -3, 3, -2, -6, -3, 3, -2, -6, -3, 3, -2, -6 ); assert_eq!( r, ::mem::transmute(__msa_aver_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_aver_s_h() { #[rustfmt::skip] let a = i16x8::new(-1, -2, 3, -4, -1, -2, 3, -4); #[rustfmt::skip] let b = i16x8::new(-6, 7, -8, -9, -6, 7, -8, -9); #[rustfmt::skip] let r = i16x8::new(-3, 3, -2, -6, -3, 3, -2, -6); assert_eq!( r, ::mem::transmute(__msa_aver_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_aver_s_w() { #[rustfmt::skip] let a = i32x4::new(-1, -2, 3, -4); #[rustfmt::skip] let b = i32x4::new(-6, 7, -8, -9); #[rustfmt::skip] let r = i32x4::new(-3, 3, -2, -6); assert_eq!( r, ::mem::transmute(__msa_aver_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_aver_s_d() { #[rustfmt::skip] let a = i64x2::new(-1, -2); #[rustfmt::skip] let b = i64x2::new(-6, -7); #[rustfmt::skip] let r = i64x2::new(-3, -4); assert_eq!( r, ::mem::transmute(__msa_aver_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_aver_u_b() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7 ); assert_eq!( r, ::mem::transmute(__msa_aver_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_aver_u_h() { #[rustfmt::skip] let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u16x8::new(4, 5, 6, 7, 4, 5, 6, 7); assert_eq!( r, ::mem::transmute(__msa_aver_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_aver_u_w() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(4, 5, 6, 7); assert_eq!( r, ::mem::transmute(__msa_aver_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_aver_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 2); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = u64x2::new(4, 5); assert_eq!( r, ::mem::transmute(__msa_aver_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bclr_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 191, 27, 54, 1, 191, 27, 54, 1, 191, 27, 54, 1, 191, 27, 54, 1 ); assert_eq!( r, ::mem::transmute(__msa_bclr_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bclr_h() { #[rustfmt::skip] let a = u16x8::new(255, 155, 55, 1, 255, 155, 55, 1); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u16x8::new(191, 27, 55, 1, 191, 27, 55, 1); assert_eq!( r, ::mem::transmute(__msa_bclr_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bclr_w() { #[rustfmt::skip] let a = u32x4::new(255, 155, 55, 1); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(191, 27, 55, 1); assert_eq!( r, ::mem::transmute(__msa_bclr_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bclr_d() { #[rustfmt::skip] let a = u64x2::new(255, 155); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = u64x2::new(191, 27); assert_eq!( r, ::mem::transmute(__msa_bclr_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bclri_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let r = u8x16::new( 247, 147, 55, 1, 247, 147, 55, 1, 247, 147, 55, 1, 247, 147, 55, 1 ); assert_eq!(r, ::mem::transmute(__msa_bclri_b(::mem::transmute(a), 3))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bclri_h() { #[rustfmt::skip] let a = u16x8::new(2155, 1155, 155, 1, 2155, 1155, 155, 1); #[rustfmt::skip] let r = u16x8::new(107, 1155, 155, 1, 107, 1155, 155, 1); assert_eq!(r, ::mem::transmute(__msa_bclri_h(::mem::transmute(a), 11))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bclri_w() { #[rustfmt::skip] let a = u32x4::new(211111155, 111111155, 11111155, 1); #[rustfmt::skip] let r = u32x4::new(202722547, 102722547, 2722547, 1); assert_eq!(r, ::mem::transmute(__msa_bclri_w(::mem::transmute(a), 23))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bclri_d() { #[rustfmt::skip] let a = u64x2::new(211111111155, 11111111111111155); #[rustfmt::skip] let r = u64x2::new(73672157683, 11110973672157683); assert_eq!(r, ::mem::transmute(__msa_bclri_d(::mem::transmute(a), 37))); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsl_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let c = u8x16::new( 1, 3, 5, 9, 1, 3, 5, 9, 1, 3, 5, 9, 1, 3, 5, 9 ); #[rustfmt::skip] let r = u8x16::new( 63, 11, 11, 1, 63, 11, 11, 1, 63, 11, 11, 1, 63, 11, 11, 1 ); assert_eq!( r, ::mem::transmute(__msa_binsl_b( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsl_h() { #[rustfmt::skip] let a = u16x8::new( 32767, 16384, 8192, 4096, 32767, 16384, 8192, 4096 ); #[rustfmt::skip] let b = u16x8::new( 21656, 5273, 7081, 2985, 21656, 5273, 7081, 2985 ); #[rustfmt::skip] let c = u16x8::new( 3, 7, 9, 13, 15, 17, 21, 23 ); #[rustfmt::skip] let r = u16x8::new( 24575, 5120, 7040, 2984, 21656, 0, 6144, 2816 ); assert_eq!( r, ::mem::transmute(__msa_binsl_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsl_w() { #[rustfmt::skip] let a = u32x4::new(2147483647, 536870912, 67108864, 8388608); #[rustfmt::skip] let b = u32x4::new(1036372536, 259093134, 78219975, 1119499719); #[rustfmt::skip] let c = u32x4::new(11, 15, 31, 37); #[rustfmt::skip] let r = u32x4::new(1037041663, 259063808, 78219975, 1082130432); assert_eq!( r, ::mem::transmute(__msa_binsl_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsl_d() { #[rustfmt::skip] let a = u64x2::new(8006399338, 2882303762); #[rustfmt::skip] let b = u64x2::new(9223372036854775805, 536870912); #[rustfmt::skip] let c = u64x2::new(12, 48); #[rustfmt::skip] let r = u64x2::new(9221120245047489898, 536901394); assert_eq!( r, ::mem::transmute(__msa_binsl_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsli_b() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 7, 7, 11, 9, 7, 7, 11, 9, 7, 7, 11, 9, 7, 7, 11, 9 ); assert_eq!( r, ::mem::transmute(__msa_binsli_b(::mem::transmute(a), ::mem::transmute(b), 5)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsli_h() { #[rustfmt::skip] let a = u16x8::new( 32767, 16384, 8192, 4096, 32767, 16384, 8192, 4096 ); #[rustfmt::skip] let b = u16x8::new( 21656, 5273, 7081, 2985, 21656, 5273, 7081, 2985 ); #[rustfmt::skip] let r = u16x8::new( 21659, 5272, 7080, 2984, 21659, 5272, 7080, 2984 ); assert_eq!( r, ::mem::transmute(__msa_binsli_h(::mem::transmute(a), ::mem::transmute(b), 13)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsli_w() { #[rustfmt::skip] let a = u32x4::new(2147483647, 536870912, 67108864, 8388608); #[rustfmt::skip] let b = u32x4::new(1036372536, 259093134, 78219975, 1119499719); #[rustfmt::skip] let r = u32x4::new(1036386303, 259080192, 78217216, 1119485952); assert_eq!( r, ::mem::transmute(__msa_binsli_w(::mem::transmute(a), ::mem::transmute(b), 17)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsli_d() { #[rustfmt::skip] let a = u64x2::new(8006399338, 2882303762); #[rustfmt::skip] let b = u64x2::new(9223372036854775805, 536870912); #[rustfmt::skip] let r = u64x2::new(9223372036854773098, 536901394); assert_eq!( r, ::mem::transmute(__msa_binsli_d(::mem::transmute(a), ::mem::transmute(b), 48)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsr_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let c = u8x16::new( 1, 3, 5, 9, 1, 3, 5, 9, 1, 3, 5, 9, 1, 3, 5, 9 ); #[rustfmt::skip] let r = u8x16::new( 254, 151, 8, 1, 254, 151, 8, 1, 254, 151, 8, 1, 254, 151, 8, 1 ); assert_eq!( r, ::mem::transmute(__msa_binsr_b( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsr_h() { #[rustfmt::skip] let a = u16x8::new( 32767, 16384, 8192, 4096, 32767, 16384, 8192, 4096 ); #[rustfmt::skip] let b = u16x8::new( 21656, 5273, 7081, 2985, 21656, 5273, 7081, 2985 ); #[rustfmt::skip] let c = u16x8::new( 3, 7, 9, 13, 15, 17, 21, 23 ); #[rustfmt::skip] let r = u16x8::new( 32760, 16537, 9129, 2985, 21656, 16385, 8233, 4265 ); assert_eq!( r, ::mem::transmute(__msa_binsr_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsr_w() { #[rustfmt::skip] let a = u32x4::new(2147483647, 536870912, 67108864, 8388608); #[rustfmt::skip] let b = u32x4::new(1036372536, 259093134, 78219975, 1119499719); #[rustfmt::skip] let c = u32x4::new(11, 15, 31, 37); #[rustfmt::skip] let r = u32x4::new(2147482168, 536900238, 78219975, 8388615); assert_eq!( r, ::mem::transmute(__msa_binsr_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsr_d() { #[rustfmt::skip] let a = u64x2::new(8006399338, 2882303762); #[rustfmt::skip] let b = u64x2::new(9223372036854775805, 536870912); #[rustfmt::skip] let c = u64x2::new(12, 48); #[rustfmt::skip] let r = u64x2::new(8006402045, 536870912); assert_eq!( r, ::mem::transmute(__msa_binsr_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsri_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 198, 135, 8, 9, 198, 135, 8, 9, 198, 135, 8, 9, 198, 135, 8, 9 ); assert_eq!( r, ::mem::transmute(__msa_binsri_b(::mem::transmute(a), ::mem::transmute(b), 5)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsri_h() { #[rustfmt::skip] let a = u16x8::new( 32767, 16384, 8192, 4096, 32767, 16384, 8192, 4096 ); #[rustfmt::skip] let b = u16x8::new( 21656, 5273, 7081, 2985, 21656, 5273, 7081, 2985 ); #[rustfmt::skip] let r = u16x8::new( 21656, 21657, 7081, 2985, 21656, 21657, 7081, 2985 ); assert_eq!( r, ::mem::transmute(__msa_binsri_h(::mem::transmute(a), ::mem::transmute(b), 13)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsri_w() { #[rustfmt::skip] let a = u32x4::new(2147483647, 536870912, 67108864, 8388608); #[rustfmt::skip] let b = u32x4::new(1036372536, 259093134, 78219975, 1119499719); #[rustfmt::skip] let r = u32x4::new(2147338808, 536965774, 67209927, 8533447); assert_eq!( r, ::mem::transmute(__msa_binsri_w(::mem::transmute(a), ::mem::transmute(b), 17)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_binsri_d() { #[rustfmt::skip] let a = u64x2::new(8006399338, 2882303762); #[rustfmt::skip] let b = u64x2::new(9223372036854775805, 536870912); #[rustfmt::skip] let r = u64x2::new(562949953421309, 536870912); assert_eq!( r, ::mem::transmute(__msa_binsri_d(::mem::transmute(a), ::mem::transmute(b), 48)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bmnz_v() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, ); #[rustfmt::skip] let c = u8x16::new( 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1 ); #[rustfmt::skip] let r = u8x16::new( 254, 159, 48, 1, 254, 159, 48, 1, 254, 159, 48, 1, 254, 159, 48, 1 ); assert_eq!( r, ::mem::transmute(__msa_bmnz_v( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bmnzi_b() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55 ); #[rustfmt::skip] let r = u8x16::new( 249, 159, 51, 7, 249, 159, 51, 7, 249, 159, 51, 7, 249, 159, 51, 7 ); assert_eq!( r, ::mem::transmute(__msa_bmnzi_b(::mem::transmute(a), ::mem::transmute(b), 7)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bmz_v() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let c = u8x16::new( 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1 ); #[rustfmt::skip] let r = u8x16::new( 7, 3, 15, 9, 7, 3, 15, 9, 7, 3, 15, 9, 7, 3, 15, 9 ); assert_eq!( r, ::mem::transmute(__msa_bmz_v( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bmzi_b() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1, u8::max_value(), 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55 ); #[rustfmt::skip] let r = u8x16::new( 7, 251, 159, 49, 7, 251, 159, 49, 7, 251, 159, 49, 7, 251, 159, 49 ); assert_eq!( r, ::mem::transmute(__msa_bmzi_b(::mem::transmute(a), ::mem::transmute(b), 7)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bneg_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 191, 27, 54, 3, 191, 27, 54, 3, 191, 27, 54, 3, 191, 27, 54, 3 ); assert_eq!( r, ::mem::transmute(__msa_bneg_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bneg_h() { #[rustfmt::skip] let a = u16x8::new(255, 155, 55, 1, 255, 155, 55, 1); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u16x8::new(191, 27, 311, 513, 191, 27, 311, 513); assert_eq!( r, ::mem::transmute(__msa_bneg_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bneg_w() { #[rustfmt::skip] let a = u32x4::new(255, 155, 55, 1); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(191, 27, 311, 513); assert_eq!( r, ::mem::transmute(__msa_bneg_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bneg_d() { #[rustfmt::skip] let a = u64x2::new(255, 155); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = u64x2::new(191, 27); assert_eq!( r, ::mem::transmute(__msa_bneg_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bnegi_b() { #[rustfmt::skip] let a = u8x16::new( 50, 100, 127, u8::max_value(), 50, 100, 127, u8::max_value(), 50, 100, 127, u8::max_value(), 50, 100, 127, u8::max_value() ); #[rustfmt::skip] let r = u8x16::new( 34, 116, 111, 239, 34, 116, 111, 239, 34, 116, 111, 239, 34, 116, 111, 239 ); assert_eq!(r, ::mem::transmute(__msa_bnegi_b(::mem::transmute(a), 4))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bnegi_h() { #[rustfmt::skip] let a = u16x8::new( 32767, 3276, 100, 127, 32767, 3276, 100, 127 ); #[rustfmt::skip] let r = u16x8::new( 30719, 1228, 2148, 2175, 30719, 1228, 2148, 2175 ); assert_eq!(r, ::mem::transmute(__msa_bnegi_h(::mem::transmute(a), 11))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bnegi_w() { #[rustfmt::skip] let a = u32x4::new(100, 2147483647, 100, 2147483648); #[rustfmt::skip] let r = u32x4::new(16777316, 2130706431, 16777316, 2164260864); assert_eq!(r, ::mem::transmute(__msa_bnegi_w(::mem::transmute(a), 24))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bnegi_d() { #[rustfmt::skip] let a = u64x2::new(100, 9223372036854775808); #[rustfmt::skip] let r = u64x2::new(4398046511204, 9223376434901286912); assert_eq!(r, ::mem::transmute(__msa_bnegi_d(::mem::transmute(a), 42))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bnz_b() { #[rustfmt::skip] let a = u8x16::new( 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 0, 4, ); let r = 0 as i32; assert_eq!(r, ::mem::transmute(__msa_bnz_b(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bnz_h() { #[rustfmt::skip] let a = u16x8::new( 32767, 3276, 100, 127, 32767, 0, 100, 127 ); let r = 0 as i32; assert_eq!(r, ::mem::transmute(__msa_bnz_h(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bnz_w() { #[rustfmt::skip] let a = u32x4::new(100, 2147483647, 0, 2147483648); let r = 0 as i32; assert_eq!(r, ::mem::transmute(__msa_bnz_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bnz_d() { #[rustfmt::skip] let a = u64x2::new(100, 9223372036854775808); #[rustfmt::skip] let r = 1 as i32; assert_eq!(r, ::mem::transmute(__msa_bnz_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bnz_v() { #[rustfmt::skip] let a = u8x16::new( 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ); let r = 1 as i32; assert_eq!(r, ::mem::transmute(__msa_bnz_v(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bsel_v() { #[rustfmt::skip] let a = u8x16::new( 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1, 3, 5, 7, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let c = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let r = u8x16::new( 7, 3, 15, 9, 7, 3, 15, 9, 7, 3, 15, 9, 7, 3, 15, 9 ); assert_eq!( r, ::mem::transmute(__msa_bsel_v( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bseli_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 121, 29, 57, 9, 121, 29, 57, 9, 121, 29, 57, 9, 121, 29, 57, 9 ); assert_eq!( r, ::mem::transmute(__msa_bseli_b(::mem::transmute(a), ::mem::transmute(b), 121)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bset_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 255, 155, 55, 3, 255, 155, 55, 3, 255, 155, 55, 3, 255, 155, 55, 3 ); assert_eq!( r, ::mem::transmute(__msa_bset_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bset_h() { #[rustfmt::skip] let a = u16x8::new(255, 155, 55, 1, 255, 155, 55, 1); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u16x8::new(255, 155, 311, 513, 255, 155, 311, 513); assert_eq!( r, ::mem::transmute(__msa_bset_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bset_w() { #[rustfmt::skip] let a = u32x4::new(255, 155, 55, 1); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(255, 155, 311, 513); assert_eq!( r, ::mem::transmute(__msa_bset_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bset_d() { #[rustfmt::skip] let a = u64x2::new(255, 155); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = u64x2::new(255, 155); assert_eq!( r, ::mem::transmute(__msa_bset_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_bseti_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); #[rustfmt::skip] let r = u8x16::new( 255, 159, 55, 5, 255, 159, 55, 5, 255, 159, 55, 5, 255, 159, 55, 5 ); assert_eq!(r, ::mem::transmute(__msa_bseti_b(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bseti_h() { #[rustfmt::skip] let a = u16x8::new(255, 155, 55, 1, 255, 155, 55, 1); #[rustfmt::skip] let r = u16x8::new(255, 159, 55, 5, 255, 159, 55, 5); assert_eq!(r, ::mem::transmute(__msa_bseti_h(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bseti_w() { #[rustfmt::skip] let a = u32x4::new(255, 155, 55, 1); #[rustfmt::skip] let r = u32x4::new(255, 159, 55, 5); assert_eq!(r, ::mem::transmute(__msa_bseti_w(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bseti_d() { #[rustfmt::skip] let a = u64x2::new(255, 155); #[rustfmt::skip] let r = u64x2::new(255, 159); assert_eq!(r, ::mem::transmute(__msa_bseti_d(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bz_b() { #[rustfmt::skip] let a = u8x16::new( 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1, 255, 155, 55, 1 ); let r = 0 as i32; assert_eq!(r, ::mem::transmute(__msa_bz_b(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bz_h() { #[rustfmt::skip] let a = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); let r = 1 as i32; assert_eq!(r, ::mem::transmute(__msa_bz_h(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bz_w() { #[rustfmt::skip] let a = u32x4::new(255, 0, 55, 1); let r = 1 as i32; assert_eq!(r, ::mem::transmute(__msa_bz_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bz_d() { #[rustfmt::skip] let a = u64x2::new(255, 0); let r = 1 as i32; assert_eq!(r, ::mem::transmute(__msa_bz_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_bz_v() { #[rustfmt::skip] let a = u8x16::new( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ); let r = 1 as i32; assert_eq!(r, ::mem::transmute(__msa_bz_v(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ceq_b() { #[rustfmt::skip] let a = i8x16::new( -128, 127, 55, 1, -128, 127, 55, 1, -128, 127, 55, 1, -128, 127, 55, 1 ); #[rustfmt::skip] let b = i8x16::new( -128, 126, 55, 1, -128, 126, 55, 1, -128, 126, 55, 1, -128, 126, 55, 1 ); #[rustfmt::skip] let r = i8x16::new( -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1 ); assert_eq!( r, ::mem::transmute(__msa_ceq_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ceq_h() { #[rustfmt::skip] let a = i16x8::new(255, 155, 55, 1, 255, 155, 55, 1); #[rustfmt::skip] let b = i16x8::new(255, 155, 56, 1, 255, 155, 56, 1); #[rustfmt::skip] let r = i16x8::new(-1, -1, 0, -1, -1, -1, 0, -1); assert_eq!( r, ::mem::transmute(__msa_ceq_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ceq_w() { #[rustfmt::skip] let a = i32x4::new(255, 155, 55, 1); #[rustfmt::skip] let b = i32x4::new(255, 156, 55, 1); #[rustfmt::skip] let r = i32x4::new(-1, 0, -1, -1); assert_eq!( r, ::mem::transmute(__msa_ceq_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ceq_d() { #[rustfmt::skip] let a = i64x2::new(255, 155); #[rustfmt::skip] let b = i64x2::new(255, 156); #[rustfmt::skip] let r = i64x2::new(-1, 0); assert_eq!( r, ::mem::transmute(__msa_ceq_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ceqi_b() { #[rustfmt::skip] let a = i8x16::new( 100, -1, -4, 15, 100, -1, -4, 15, 100, -1, -4, 15, 100, -1, -4, 15 ); #[rustfmt::skip] let r = i8x16::new( 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0 ); assert_eq!(r, ::mem::transmute(__msa_ceqi_b(::mem::transmute(a), -4))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ceqi_h() { #[rustfmt::skip] let a = i16x8::new( 32767, 3276, 100, -11, 32767, 3276, 100, -11 ); #[rustfmt::skip] let r = i16x8::new(0, 0, 0, -1, 0, 0, 0, -1); assert_eq!(r, ::mem::transmute(__msa_ceqi_h(::mem::transmute(a), -11))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ceqi_w() { #[rustfmt::skip] let a = i32x4::new(1, 3, 5, -3); #[rustfmt::skip] let r = i32x4::new(0, 0, -1, 0); assert_eq!(r, ::mem::transmute(__msa_ceqi_w(::mem::transmute(a), 5))); } // FIXME: https://reviews.llvm.org/D59884 // If target type is i64, negative immediate loses the sign // Test passes if 4294967293 is used instead -3 in vector 'a' // #[simd_test(enable = "msa")] // unsafe fn test_msa_ceqi_d() { // #[rustfmt::skip] // let a = i64x2::new(-3, 2); // #[rustfmt::skip] // let r = i64x2::new(-1, 0); // assert_eq!(r, ::mem::transmute(__msa_ceqi_d(::mem::transmute(a), -3))); // } // Can not be tested in user mode // #[simd_test(enable = "msa")] // unsafe fn test_msa_cfcmsa() { // let r = 5; // assert_eq!(r, ::mem::transmute(__msa_cfcmsa(5)); // } #[simd_test(enable = "msa")] unsafe fn test_msa_cle_s_b() { #[rustfmt::skip] let a = i8x16::new( -128, 127, 55, 2, -128, 127, 55, 2, -128, 127, 55, 2, -128, 127, 55, 2 ); #[rustfmt::skip] let b = i8x16::new( -128, 126, 55, 1, -128, 126, 55, 1, -128, 126, 55, 1, -128, 126, 55, 1 ); #[rustfmt::skip] let r = i8x16::new( -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0 ); assert_eq!( r, ::mem::transmute(__msa_cle_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_cle_s_h() { #[rustfmt::skip] let a = i16x8::new(255, 155, 55, 2, 255, 155, 55, 2); #[rustfmt::skip] let b = i16x8::new(255, 155, 56, 1, 255, 155, 56, 1); #[rustfmt::skip] let r = i16x8::new(-1, -1, -1, 0, -1, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_cle_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_cle_s_w() { #[rustfmt::skip] let a = i32x4::new(255, 155, 55, 2); #[rustfmt::skip] let b = i32x4::new(255, 156, 55, 1); #[rustfmt::skip] let r = i32x4::new(-1, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_cle_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_cle_s_d() { #[rustfmt::skip] let a = i64x2::new(255, 155); #[rustfmt::skip] let b = i64x2::new(255, 156); #[rustfmt::skip] let r = i64x2::new(-1, -1); assert_eq!( r, ::mem::transmute(__msa_cle_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_cle_u_b() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 127, 55, 2, u8::max_value(), 127, 55, 2, u8::max_value(), 127, 55, 2, u8::max_value(), 127, 55, 2 ); #[rustfmt::skip] let b = u8x16::new( u8::max_value(), 126, 55, 1, u8::max_value(), 126, 55, 1, u8::max_value(), 126, 55, 1, u8::max_value(), 126, 55, 1 ); #[rustfmt::skip] let r = i8x16::new(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); assert_eq!( r, ::mem::transmute(__msa_cle_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_cle_u_h() { #[rustfmt::skip] let a = u16x8::new( u16::max_value(), 155, 55, 2, u16::max_value(), 155, 55, 2 ); #[rustfmt::skip] let b = u16x8::new( u16::max_value(), 155, 56, 1, u16::max_value(), 155, 56, 1 ); #[rustfmt::skip] let r = i16x8::new(-1, -1, -1, 0, -1, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_cle_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_cle_u_w() { #[rustfmt::skip] let a = u32x4::new(u32::max_value(), 155, 55, 2); #[rustfmt::skip] let b = u32x4::new(u32::max_value(), 156, 55, 1); #[rustfmt::skip] let r = i32x4::new(-1, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_cle_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_cle_u_d() { #[rustfmt::skip] let a = u64x2::new(u64::max_value(), 155); #[rustfmt::skip] let b = u64x2::new(u64::max_value(), 156); #[rustfmt::skip] let r = i64x2::new(-1, -1); assert_eq!( r, ::mem::transmute(__msa_cle_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_clei_s_b() { #[rustfmt::skip] let a = i8x16::new( -2, -127, 100, -127, -2, -127, 100, -127, -2, -127, 100, -127, -2, -127, 100, -127 ); #[rustfmt::skip] let r = i8x16::new(-1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1); assert_eq!(r, ::mem::transmute(__msa_clei_s_b(::mem::transmute(a), -2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clei_s_h() { #[rustfmt::skip] let a = i16x8::new( 32767, 3276, 10, -1, 32767, 3276, 10, -1, ); #[rustfmt::skip] let r = i16x8::new(0, 0, 0, -1, 0, 0, 0, -1); assert_eq!(r, ::mem::transmute(__msa_clei_s_h(::mem::transmute(a), -1))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clei_s_w() { #[rustfmt::skip] let a = i32x4::new(100, 2147483647, 6, 2147483647); #[rustfmt::skip] let r = i32x4::new(0, 0, -1, 0); assert_eq!(r, ::mem::transmute(__msa_clei_s_w(::mem::transmute(a), 6))); } // FIXME: https://reviews.llvm.org/D59884 // If target type is i64, negative immediate loses the sign // -3 is represented as 4294967293 // #[simd_test(enable = "msa")] // unsafe fn test_msa_clei_s_d() { // #[rustfmt::skip] // let a = i64x2::new(-3, 11); // #[rustfmt::skip] // let r = i64x2::new(-1, 0); // assert_eq!(r, ::mem::transmute(__msa_clei_s_d(::mem::transmute(a), -3))); // } #[simd_test(enable = "msa")] unsafe fn test_msa_clei_u_b() { #[rustfmt::skip] let a = u8x16::new( 2, 127, 100, 127, 2, 127, 100, 127, 2, 127, 100, 127, 2, 127, 100, 127, ); #[rustfmt::skip] let r = i8x16::new( -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0 ); assert_eq!(r, ::mem::transmute(__msa_clei_u_b(::mem::transmute(a), 25))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clei_u_h() { #[rustfmt::skip] let a = u16x8::new( 1, 26, 15, 36, 1, 26, 15, 36 ); #[rustfmt::skip] let r = i16x8::new(-1, 0, -1, 0, -1, 0, -1, 0); assert_eq!(r, ::mem::transmute(__msa_clei_u_h(::mem::transmute(a), 25))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clei_u_w() { #[rustfmt::skip] let a = u32x4::new(25, 32, 25, 32); #[rustfmt::skip] let r = i32x4::new(-1, 0, -1, 0); assert_eq!(r, ::mem::transmute(__msa_clei_u_w(::mem::transmute(a), 31))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clei_u_d() { #[rustfmt::skip] let a = u64x2::new(10, 26); #[rustfmt::skip] let r = i64x2::new(-1, 0); assert_eq!(r, ::mem::transmute(__msa_clei_u_d(::mem::transmute(a), 25))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clt_s_b() { #[rustfmt::skip] let a = i8x16::new( -128, 127, 55, 2, -128, 127, 55, 2, -128, 127, 55, 2, -128, 127, 55, 2 ); #[rustfmt::skip] let b = i8x16::new( -127, 126, 56, 1, -127, 126, 56, 1, -127, 126, 56, 1, -127, 126, 56, 1 ); #[rustfmt::skip] let r = i8x16::new( -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0 ); assert_eq!( r, ::mem::transmute(__msa_clt_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_clt_s_h() { #[rustfmt::skip] let a = i16x8::new(-255, 155, 55, 2, -255, 155, 55, 2); #[rustfmt::skip] let b = i16x8::new(255, 156, 56, 1, 255, 156, 56, 1); #[rustfmt::skip] let r = i16x8::new(-1, -1, -1, 0, -1, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_clt_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_clt_s_w() { #[rustfmt::skip] let a = i32x4::new(-255, 155, 55, 2); #[rustfmt::skip] let b = i32x4::new(255, 156, 55, 1); #[rustfmt::skip] let r = i32x4::new(-1, -1, 0, 0); assert_eq!( r, ::mem::transmute(__msa_clt_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_clt_s_d() { #[rustfmt::skip] let a = i64x2::new(-255, 155); #[rustfmt::skip] let b = i64x2::new(255, 156); #[rustfmt::skip] let r = i64x2::new(-1, -1); assert_eq!( r, ::mem::transmute(__msa_clt_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_clt_u_b() { #[rustfmt::skip] let a = u8x16::new( 128, 127, 55, 2, 128, 127, 55, 2, 128, 127, 55, 2, 128, 127, 55, 2 ); #[rustfmt::skip] let b = u8x16::new( 127, 126, 56, 1, 127, 126, 56, 1, 127, 126, 56, 1, 127, 126, 56, 1 ); #[rustfmt::skip] let r = i8x16::new( 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0 ); assert_eq!( r, ::mem::transmute(__msa_clt_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_clt_u_h() { #[rustfmt::skip] let a = u16x8::new(255, 155, 55, 2, 255, 155, 55, 2); #[rustfmt::skip] let b = u16x8::new(255, 156, 56, 1, 255, 156, 56, 1); #[rustfmt::skip] let r = i16x8::new(0, -1, -1, 0, 0, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_clt_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_clt_u_w() { #[rustfmt::skip] let a = u32x4::new(255, 155, 55, 2); #[rustfmt::skip] let b = u32x4::new(255, 156, 55, 1); #[rustfmt::skip] let r = i32x4::new(0, -1, 0, 0); assert_eq!( r, ::mem::transmute(__msa_clt_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_clt_u_d() { #[rustfmt::skip] let a = u64x2::new(255, 155); #[rustfmt::skip] let b = u64x2::new(255, 156); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_clt_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_clti_s_b() { #[rustfmt::skip] let a = i8x16::new( 2, -127, -5, 127, 2, -127, -5, 127, 2, -127, -5, 127, 2, -127, -5, 127 ); #[rustfmt::skip] let r = i8x16::new( 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0 ); assert_eq!(r, ::mem::transmute(__msa_clti_s_b(::mem::transmute(a), -5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clti_s_h() { #[rustfmt::skip] let a = i16x8::new( -1024, 3276, 15, 127, -1024, 3276, 15, 127 ); #[rustfmt::skip] let r = i16x8::new(-1, 0, 0, 0, -1, 0, 0, 0); assert_eq!(r, ::mem::transmute(__msa_clti_s_h(::mem::transmute(a), 15))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clti_s_w() { #[rustfmt::skip] let a = i32x4::new(-15, 2147483647, -15, 2147483647); #[rustfmt::skip] let r = i32x4::new(-1, 0, -1, 0); assert_eq!( r, ::mem::transmute(__msa_clti_s_w(::mem::transmute(a), -10)) ); } // FIXME: https://reviews.llvm.org/D59884 // If target type is i64, negative immediate loses the sign // -3 is represented as 4294967293 // #[simd_test(enable = "msa")] // unsafe fn test_msa_clti_s_d() { // #[rustfmt::skip] // let a = i64x2::new(-5, -2); // #[rustfmt::skip] // let r = i64x2::new(-1, 0); // assert_eq!(r, ::mem::transmute(__msa_clti_s_d(::mem::transmute(a), -3))); // } #[simd_test(enable = "msa")] unsafe fn test_msa_clti_u_b() { #[rustfmt::skip] let a = u8x16::new( 2, 127, 49, 127, 2, 127, 49, 127, 2, 127, 49, 127, 2, 127, 49, 127, ); #[rustfmt::skip] let r = i8x16::new( -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0 ); assert_eq!(r, ::mem::transmute(__msa_clti_u_b(::mem::transmute(a), 50))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clti_u_h() { #[rustfmt::skip] let a = u16x8::new( 327, 3276, 100, 127, 327, 3276, 100, 127 ); #[rustfmt::skip] let r = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); assert_eq!(r, ::mem::transmute(__msa_clti_u_h(::mem::transmute(a), 30))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clti_u_w() { #[rustfmt::skip] let a = u32x4::new(100, 2147483647, 100, 2147483647); #[rustfmt::skip] let r = i32x4::new(0, 0, 0, 0); assert_eq!(r, ::mem::transmute(__msa_clti_u_w(::mem::transmute(a), 10))); } #[simd_test(enable = "msa")] unsafe fn test_msa_clti_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 9223372036854775807); #[rustfmt::skip] let r = i64x2::new(-1, 0); assert_eq!(r, ::mem::transmute(__msa_clti_u_d(::mem::transmute(a), 10))); } #[simd_test(enable = "msa")] unsafe fn test_msa_copy_s_b() { #[rustfmt::skip] let a = i8x16::new( -100, 127, 4, 127, -100, 127, 4, 127, -100, 127, 4, 127, -100, 127, 4, 127 ); #[rustfmt::skip] let r = -100 as i32; assert_eq!(r, ::mem::transmute(__msa_copy_s_b(::mem::transmute(a), 12))); } #[simd_test(enable = "msa")] unsafe fn test_msa_copy_s_h() { #[rustfmt::skip] let a = i16x8::new( 32767, 3276, 100, 11, 32767, 3276, 100, 11 ); #[rustfmt::skip] let r = 32767 as i32; assert_eq!(r, ::mem::transmute(__msa_copy_s_h(::mem::transmute(a), 4))); } #[simd_test(enable = "msa")] unsafe fn test_msa_copy_s_w() { #[rustfmt::skip] let a = i32x4::new(100, 2147483647, 5, -2147483647); let r = 2147483647 as i32; assert_eq!(r, ::mem::transmute(__msa_copy_s_w(::mem::transmute(a), 1))); } #[simd_test(enable = "msa")] unsafe fn test_msa_copy_s_d() { #[rustfmt::skip] let a = i64x2::new(3, 9223372036854775807); #[rustfmt::skip] let r = 9223372036854775807 as i64; assert_eq!(r, ::mem::transmute(__msa_copy_s_d(::mem::transmute(a), 1))); } #[simd_test(enable = "msa")] unsafe fn test_msa_copy_u_b() { #[rustfmt::skip] let a = i8x16::new( 100, 127, 4, 127, 100, 127, 4, 127, 100, 127, 4, 127, 100, 127, 4, 127 ); #[rustfmt::skip] let r = 100 as u32; assert_eq!(r, ::mem::transmute(__msa_copy_u_b(::mem::transmute(a), 12))); } #[simd_test(enable = "msa")] unsafe fn test_msa_copy_u_h() { #[rustfmt::skip] let a = i16x8::new( 32767, 3276, 100, 11, 32767, 3276, 100, 11 ); #[rustfmt::skip] let r = 32767 as u32; assert_eq!(r, ::mem::transmute(__msa_copy_u_h(::mem::transmute(a), 4))); } #[simd_test(enable = "msa")] unsafe fn test_msa_copy_u_w() { #[rustfmt::skip] let a = i32x4::new(100, 2147483647, 5, 2147483647); #[rustfmt::skip] let r = 2147483647 as u32; assert_eq!(r, ::mem::transmute(__msa_copy_u_w(::mem::transmute(a), 1))); } #[simd_test(enable = "msa")] unsafe fn test_msa_copy_u_d() { #[rustfmt::skip] let a = i64x2::new(3, i64::max_value()); #[rustfmt::skip] let r = 9223372036854775807 as u64; assert_eq!(r, ::mem::transmute(__msa_copy_u_d(::mem::transmute(a), 1))); } // Can not be tested in user mode // #[simd_test(enable = "msa")] // unsafe fn test_msa_ctcmsa() { // } #[simd_test(enable = "msa")] unsafe fn test_msa_div_s_b() { #[rustfmt::skip] let a = i8x16::new( -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9 ); #[rustfmt::skip] let b = i8x16::new( -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4, -1, -2, -3, -4 ); #[rustfmt::skip] let r = i8x16::new( 6, 3, 2, 2, 6, 3, 2, 2, 6, 3, 2, 2, 6, 3, 2, 2 ); assert_eq!( r, ::mem::transmute(__msa_div_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_div_s_h() { #[rustfmt::skip] let a = i16x8::new(-6, -7, -8, -9, 6, 7, 8, 9); #[rustfmt::skip] let b = i16x8::new(-1, -2, -3, -4, -1, -2, -3, -4); #[rustfmt::skip] let r = i16x8::new(6, 3, 2, 2, -6, -3, -2, -2); assert_eq!( r, ::mem::transmute(__msa_div_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_div_s_w() { #[rustfmt::skip] let a = i32x4::new(-6, -7, 8, 9); #[rustfmt::skip] let b = i32x4::new(-1, -2, -3, -4); #[rustfmt::skip] let r = i32x4::new(6, 3, -2, -2); assert_eq!( r, ::mem::transmute(__msa_div_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_div_s_d() { #[rustfmt::skip] let a = i64x2::new(-6, 7); #[rustfmt::skip] let b = i64x2::new(-1, -2); #[rustfmt::skip] let r = i64x2::new(6, -3); assert_eq!( r, ::mem::transmute(__msa_div_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_div_u_b() { #[rustfmt::skip] let a = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let b = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let r = u8x16::new( 6, 3, 2, 2, 6, 3, 2, 2, 6, 3, 2, 2, 6, 3, 2, 2 ); assert_eq!( r, ::mem::transmute(__msa_div_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_div_u_h() { #[rustfmt::skip] let a = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let b = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let r = u16x8::new(6, 3, 2, 2, 6, 3, 2, 2); assert_eq!( r, ::mem::transmute(__msa_div_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_div_u_w() { #[rustfmt::skip] let a = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let b = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let r = u32x4::new(6, 3, 2, 2); assert_eq!( r, ::mem::transmute(__msa_div_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_div_u_d() { #[rustfmt::skip] let a = u64x2::new(6, 7); #[rustfmt::skip] let b = u64x2::new(1, 2); #[rustfmt::skip] let r = u64x2::new(6, 3); assert_eq!( r, ::mem::transmute(__msa_div_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dotp_s_h() { #[rustfmt::skip] let a = i8x16::new( -1, -2, -3, 4, -1, -2, -3, -4, -1, -2, -3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9 ); #[rustfmt::skip] let r = i16x8::new(20, -12, 20, 60, 20, -12, 20, 60); assert_eq!( r, ::mem::transmute(__msa_dotp_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dotp_s_w() { #[rustfmt::skip] let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, 4); #[rustfmt::skip] let b = i16x8::new(-6, -7, -8, -9, -6, -7, -8, -9); #[rustfmt::skip] let r = i32x4::new(20, 60, 20, -12); assert_eq!( r, ::mem::transmute(__msa_dotp_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dotp_s_d() { #[rustfmt::skip] let a = i32x4::new(-1, -2, -3, 4); #[rustfmt::skip] let b = i32x4::new(-6, -7, -8, -9); #[rustfmt::skip] let r = i64x2::new(20, -12); assert_eq!( r, ::mem::transmute(__msa_dotp_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dotp_u_h() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u16x8::new(20, 60, 20, 60, 20, 60, 20, 60); assert_eq!( r, ::mem::transmute(__msa_dotp_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dotp_u_w() { #[rustfmt::skip] let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(20, 60, 20, 60); assert_eq!( r, ::mem::transmute(__msa_dotp_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dotp_u_d() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u64x2::new(20, 60); assert_eq!( r, ::mem::transmute(__msa_dotp_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpadd_s_h() { #[rustfmt::skip] let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, 4); #[rustfmt::skip] let b = i8x16::new( -1, -2, -3, 4, -1, -2, -3, -4, -1, -2, -3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let c = i8x16::new( -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9 ); #[rustfmt::skip] let r = i16x8::new(19, -14, 17, 56, 19, -14, 17, 64); assert_eq!( r, ::mem::transmute(__msa_dpadd_s_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpadd_s_w() { #[rustfmt::skip] let a = i32x4::new(-1, -2, -3, -4); #[rustfmt::skip] let b = i16x8::new( -1, -2, -3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let c = i16x8::new( -6, -7, -8, -9, -6, -7, -8, -9 ); #[rustfmt::skip] let r = i32x4::new(19, -14, 17, 56); assert_eq!( r, ::mem::transmute(__msa_dpadd_s_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpadd_s_d() { #[rustfmt::skip] let a = i64x2::new(-1, -2); #[rustfmt::skip] let b = i32x4::new(-1, -2, -3, 4); #[rustfmt::skip] let c = i32x4::new(-6, -7, -8, -9); #[rustfmt::skip] let r = i64x2::new(19, -14); assert_eq!( r, ::mem::transmute(__msa_dpadd_s_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpadd_u_h() { #[rustfmt::skip] let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let c = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u16x8::new(21, 62, 23, 64, 21, 62, 23, 64); assert_eq!( r, ::mem::transmute(__msa_dpadd_u_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpadd_u_w() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = u16x8::new( 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let c = u16x8::new( 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u32x4::new(21, 62, 23, 64); assert_eq!( r, ::mem::transmute(__msa_dpadd_u_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpadd_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 2); #[rustfmt::skip] let b = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let c = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u64x2::new(21, 62); assert_eq!( r, ::mem::transmute(__msa_dpadd_u_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpsub_s_h() { #[rustfmt::skip] let a = i16x8::new(-1, -2, -3, -4, -1, -2, -3, 4); #[rustfmt::skip] let b = i8x16::new( -1, -2, -3, 4, -1, -2, -3, -4, -1, -2, -3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let c = i8x16::new( -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9 ); #[rustfmt::skip] let r = i16x8::new(-21, 10, -23, -64, -21, 10, -23, -56); assert_eq!( r, ::mem::transmute(__msa_dpsub_s_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpsub_s_w() { #[rustfmt::skip] let a = i32x4::new(-1, -2, -3, -4); #[rustfmt::skip] let b = i16x8::new( -1, -2, -3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let c = i16x8::new( -6, -7, -8, -9, -6, -7, -8, -9 ); #[rustfmt::skip] let r = i32x4::new(-21, 10, -23, -64); assert_eq!( r, ::mem::transmute(__msa_dpsub_s_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpsub_s_d() { #[rustfmt::skip] let a = i64x2::new(-1, -2); #[rustfmt::skip] let b = i32x4::new(-1, -2, -3, 4); #[rustfmt::skip] let c = i32x4::new(-6, -7, -8, -9); #[rustfmt::skip] let r = i64x2::new(-21, 10); assert_eq!( r, ::mem::transmute(__msa_dpsub_s_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpsub_u_h() { #[rustfmt::skip] let a = i16x8::new(1, -2, 3, -4, -1, 2,-3, 4); #[rustfmt::skip] let b = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let c = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = i16x8::new(-19, -62, -17, -64, -21, -58, -23, -56); assert_eq!( r, ::mem::transmute(__msa_dpsub_u_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpsub_u_w() { #[rustfmt::skip] let a = i32x4::new(1, -2, 3, -4); #[rustfmt::skip] let b = u16x8::new( 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let c = u16x8::new( 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = i32x4::new(-19, -62, -17, -64); assert_eq!( r, ::mem::transmute(__msa_dpsub_u_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_dpsub_u_d() { #[rustfmt::skip] let a = i64x2::new(1, -2); #[rustfmt::skip] let b = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let c = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = i64x2::new(-19, -62); assert_eq!( r, ::mem::transmute(__msa_dpsub_u_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fadd_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, -4.4); #[rustfmt::skip] let b = f32x4::new(4.4, -3.3, 2.2, -1.1); #[rustfmt::skip] let r = f32x4::new(5.5, -5.5, 5.5, -5.5); assert_eq!( r, ::mem::transmute(__msa_fadd_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fadd_d() { #[rustfmt::skip] let a = f64x2::new(1.1, -2.2); #[rustfmt::skip] let b = f64x2::new(4.4, -3.3); #[rustfmt::skip] let r = f64x2::new(5.5, -5.5); assert_eq!( r, ::mem::transmute(__msa_fadd_d(::mem::transmute(a), ::mem::transmute(b))) ); } // Only observed beahiour should be SIGFPE signal // Can not be tested #[simd_test(enable = "msa")] unsafe fn test_msa_fcaf_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, -4.4); #[rustfmt::skip] let b = f32x4::new(0.0, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(0, 0, 0, 0); assert_eq!( r, ::mem::transmute(__msa_fcaf_w(::mem::transmute(a), ::mem::transmute(b))) ); } // Only observed beahiour should be SIGFPE signal // Can not be tested #[simd_test(enable = "msa")] unsafe fn test_msa_fcaf_d() { #[rustfmt::skip] let a = f64x2::new(1.1, -2.2); #[rustfmt::skip] let b = f64x2::new(-2.2, 1.1); #[rustfmt::skip] let r = i64x2::new(0, 0); assert_eq!( r, ::mem::transmute(__msa_fcaf_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fceq_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(-4.4, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(0, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_fceq_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fceq_d() { #[rustfmt::skip] let a = f64x2::new(1.1, -2.2); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(-1, 0); assert_eq!( r, ::mem::transmute(__msa_fceq_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fclass_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(128, 8, 128, 2); assert_eq!(r, ::mem::transmute(__msa_fclass_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fclass_d() { #[rustfmt::skip] let a = f64x2::new(1.1, -2.2); #[rustfmt::skip] let r = i64x2::new(128, 8); assert_eq!(r, ::mem::transmute(__msa_fclass_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcle_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(-4.4, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(0, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_fcle_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcle_d() { #[rustfmt::skip] let a = f64x2::new(1.1, -2.2); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(-1, -1); assert_eq!( r, ::mem::transmute(__msa_fcle_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fclt_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(-4.4, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(0, -1, 0, 0); assert_eq!( r, ::mem::transmute(__msa_fclt_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fclt_d() { #[rustfmt::skip] let a = f64x2::new(1.1, -2.2); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fclt_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcne_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(-4.4, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(-1, -1, 0, 0); assert_eq!( r, ::mem::transmute(__msa_fcne_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcne_d() { #[rustfmt::skip] let a = f64x2::new(1.1, -2.2); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fcne_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcor_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(0, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_fcor_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcor_d() { #[rustfmt::skip] let a = f64x2::new(1.1, f64::NAN); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(-1, 0); assert_eq!( r, ::mem::transmute(__msa_fcor_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcueq_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(-1, 0, -1, -1); assert_eq!( r, ::mem::transmute(__msa_fcueq_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcueq_d() { #[rustfmt::skip] let a = f64x2::new(1.1, f64::NAN); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(-1, -1); assert_eq!( r, ::mem::transmute(__msa_fcueq_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcule_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(-1, -1, -1, -1); assert_eq!( r, ::mem::transmute(__msa_fcule_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcule_d() { #[rustfmt::skip] let a = f64x2::new(1.1, f64::NAN); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(-1, -1); assert_eq!( r, ::mem::transmute(__msa_fcule_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcult_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(-1, -1, 0, -1); assert_eq!( r, ::mem::transmute(__msa_fcult_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcult_d() { #[rustfmt::skip] let a = f64x2::new(1.1, f64::NAN); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fcult_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcun_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(-1, 0, 0, -1); assert_eq!( r, ::mem::transmute(__msa_fcun_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcun_d() { #[rustfmt::skip] let a = f64x2::new(1.1, f64::NAN); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fcun_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcune_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, f32::NAN); #[rustfmt::skip] let b = f32x4::new(f32::NAN, -1.2, 3.3, f32::NAN); #[rustfmt::skip] let r = i32x4::new(-1, -1, 0, -1); assert_eq!( r, ::mem::transmute(__msa_fcune_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fcune_d() { #[rustfmt::skip] let a = f64x2::new(1.1, f64::NAN); #[rustfmt::skip] let b = f64x2::new(1.1, 1.1); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fcune_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fdiv_w() { #[rustfmt::skip] let a = f32x4::new(5.25, -20.2, 333.333, -425.0); #[rustfmt::skip] let b = f32x4::new(4.0, -2.1, 11.11, 8.2); #[rustfmt::skip] let r = f32x4::new(1.3125, 9.619048, 30.002972, -51.82927); assert_eq!( r, ::mem::transmute(__msa_fdiv_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fdiv_d() { #[rustfmt::skip] let a = f64x2::new(1111.11, -222222.2); #[rustfmt::skip] let b = f64x2::new(-4.85, 3.33); #[rustfmt::skip] let r = f64x2::new(-229.09484536082473, -66733.3933933934); assert_eq!( r, ::mem::transmute(__msa_fdiv_d(::mem::transmute(a), ::mem::transmute(b))) ); } /*// FIXME: 16-bit floats #[simd_test(enable = "msa")] unsafe fn test_msa_fexdo_h() { #[rustfmt::skip] let a = f32x4::new(20.5, 2.3, 4.5, 5.4); #[rustfmt::skip] let b = f32x4::new(1.1, 1.0, 1.0, 1.0); let r = i16x8::new(1, 9, 30, 51, 1, 9, 30, 51); assert_eq!(r, ::mem::transmute(__msa_fexdo_h(::mem::transmute(a), ::mem::transmute(b)))); }*/ #[simd_test(enable = "msa")] unsafe fn test_msa_fexdo_w() { #[rustfmt::skip] let a = f64x2::new(2000005.5, 2.3); #[rustfmt::skip] let b = f64x2::new(1235689784512.1, 2147483649998.5); #[rustfmt::skip] let r = f32x4::new( 1235689800000.0, 2147483600000.0, 2000005.5, 2.3 ); assert_eq!( r, ::mem::transmute(__msa_fexdo_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fexp2_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, -4.4); #[rustfmt::skip] let b = i32x4::new(4, -3, 2, 1); #[rustfmt::skip] let r = f32x4::new(17.6, -0.275, 13.2, -8.8); assert_eq!( r, ::mem::transmute(__msa_fexp2_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fexp2_d() { #[rustfmt::skip] let a = f64x2::new(1.1, -2.2); #[rustfmt::skip] let b = i64x2::new(-4, 3); #[rustfmt::skip] let r = f64x2::new(0.06875, -17.6); assert_eq!( r, ::mem::transmute(__msa_fexp2_d(::mem::transmute(a), ::mem::transmute(b))) ); } // FIXME: 16-bit floats // #[simd_test(enable = "msa")] // unsafe fn test_msa_fexupl_w() { // #[rustfmt::skip] // let a = f16x8(1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5); // #[rustfmt::skip] // let r = f32x4::new(5.5, 6.5, 7.5, 8.5); // assert_eq!(r, ::mem::transmute(__msa_fexupl_w(::mem::transmute(a)))); // } #[simd_test(enable = "msa")] unsafe fn test_msa_fexupl_d() { #[rustfmt::skip] let a = f32x4::new(5.5, 6.5, 7.5, 8.5); #[rustfmt::skip] let r = f64x2::new(7.5, 8.5); assert_eq!(r, ::mem::transmute(__msa_fexupl_d(::mem::transmute(a)))); } // FIXME: 16-bit floats // #[simd_test(enable = "msa")] // unsafe fn test_msa_fexupr_w() { // #[rustfmt::skip] // let a = f16x8(1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5); // #[rustfmt::skip] // let r = f32x4::new(1.5, 2.5, 3.5, 4.5); // assert_eq!(r, ::mem::transmute(__msa_fexupr_w(::mem::transmute(a)))); // } #[simd_test(enable = "msa")] unsafe fn test_msa_fexupr_d() { #[rustfmt::skip] let a = f32x4::new(5.5, 6.5, 7.5, 8.5); #[rustfmt::skip] let r = f64x2::new(5.5, 6.5); assert_eq!(r, ::mem::transmute(__msa_fexupr_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ffint_s_w() { #[rustfmt::skip] let a = i32x4::new(-1, 2, -3, 4); #[rustfmt::skip] let r = f32x4::new(-1.0, 2.0, -3.0, 4.0); assert_eq!(r, ::mem::transmute(__msa_ffint_s_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ffint_s_d() { #[rustfmt::skip] let a = i64x2::new(-1, 2); #[rustfmt::skip] let r = f64x2::new(-1.0, 2.0); assert_eq!(r, ::mem::transmute(__msa_ffint_s_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ffint_u_w() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let r = f32x4::new(1.0, 2.0, 3.0, 4.0); assert_eq!(r, ::mem::transmute(__msa_ffint_u_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ffint_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 2); #[rustfmt::skip] let r = f64x2::new(1.0, 2.0); assert_eq!(r, ::mem::transmute(__msa_ffint_u_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ffql_w() { #[rustfmt::skip] let a = i16x8::new(11, 25, 33, 47, 11, 25, 33, 47); #[rustfmt::skip] let r = f32x4::new( 0.00033569336, 0.00076293945, 0.0010070801, 0.0014343262 ); assert_eq!(r, ::mem::transmute(__msa_ffql_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ffql_d() { #[rustfmt::skip] let a = i32x4::new(1111, 2222, 3333, 4444); #[rustfmt::skip] let r = f64x2::new( 0.000001552049070596695, 0.0000020693987607955933 ); assert_eq!(r, ::mem::transmute(__msa_ffql_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ffqr_w() { #[rustfmt::skip] let a = i16x8::new(12, 26, 34, 48, 11, 25, 33, 47); #[rustfmt::skip] let r = f32x4::new( 0.00036621094, 0.00079345703, 0.0010375977, 0.0014648438 ); assert_eq!(r, ::mem::transmute(__msa_ffqr_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ffqr_d() { #[rustfmt::skip] let a = i32x4::new(1111, 2555, 3333, 475); #[rustfmt::skip] let r = f64x2::new( 0.0000005173496901988983, 0.0000011897645890712738 ); assert_eq!(r, ::mem::transmute(__msa_ffqr_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fill_b() { #[rustfmt::skip] let r = i8x16::new( 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 ); assert_eq!(r, ::mem::transmute(__msa_fill_b(2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fill_h() { #[rustfmt::skip] let r = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); assert_eq!(r, ::mem::transmute(__msa_fill_h(2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fill_w() { #[rustfmt::skip] let r = i32x4::new(2, 2, 2, 2); assert_eq!(r, ::mem::transmute(__msa_fill_w(2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fill_d() { #[rustfmt::skip] let r = i64x2::new(2, 2); assert_eq!(r, ::mem::transmute(__msa_fill_d(2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_flog2_w() { #[rustfmt::skip] let a = f32x4::new(8.0, 16.0, 32.0, 64.0); #[rustfmt::skip] let r = f32x4::new(3.0, 4.0, 5.0, 6.0); assert_eq!(r, ::mem::transmute(__msa_flog2_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_flog2_d() { #[rustfmt::skip] let a = f64x2::new(8.0, 16.0); #[rustfmt::skip] let r = f64x2::new(3.0, 4.0); assert_eq!(r, ::mem::transmute(__msa_flog2_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmadd_w() { #[rustfmt::skip] let a = f32x4::new(1.0, 2.0, 3.0, 4.0); #[rustfmt::skip] let b = f32x4::new(5.0, 6.0, 7.0, 8.0); #[rustfmt::skip] let c = f32x4::new(9.0, 10.0, 11.0, 12.0); #[rustfmt::skip] let r = f32x4::new(46.0, 62.0, 80.0, 100.0); assert_eq!( r, ::mem::transmute(__msa_fmadd_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmadd_d() { #[rustfmt::skip] let a = f64x2::new(1.0, 2.0); #[rustfmt::skip] let b = f64x2::new(3.0, 4.0); #[rustfmt::skip] let c = f64x2::new(5.0, 6.0); #[rustfmt::skip] let r = f64x2::new(16.0, 26.0); assert_eq!( r, ::mem::transmute(__msa_fmadd_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmax_w() { #[rustfmt::skip] let a = f32x4::new(1.0, -6.0, 7.0, 8.0); #[rustfmt::skip] let b = f32x4::new(5.0, -2.0, 3.0, 4.0); #[rustfmt::skip] let r = f32x4::new(5.0, -2.0, 7.0, 8.0); assert_eq!( r, ::mem::transmute(__msa_fmax_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmax_d() { #[rustfmt::skip] let a = f64x2::new(1.0, 4.0); #[rustfmt::skip] let b = f64x2::new(3.0, 2.0); #[rustfmt::skip] let r = f64x2::new(3.0, 4.0); assert_eq!( r, ::mem::transmute(__msa_fmax_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmax_a_w() { #[rustfmt::skip] let a = f32x4::new(1.0, -6.0, -7.0, -8.0); #[rustfmt::skip] let b = f32x4::new(5.0, -2.0, 3.0, 4.0); #[rustfmt::skip] let r = f32x4::new(5.0, -6.0, -7.0, -8.0); assert_eq!( r, ::mem::transmute(__msa_fmax_a_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmax_a_d() { #[rustfmt::skip] let a = f64x2::new(1.0, -4.0); #[rustfmt::skip] let b = f64x2::new(3.0, 2.0); #[rustfmt::skip] let r = f64x2::new(3.0, -4.0); assert_eq!( r, ::mem::transmute(__msa_fmax_a_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmin_w() { #[rustfmt::skip] let a = f32x4::new(1.0, -6.0, 7.0, 8.0); #[rustfmt::skip] let b = f32x4::new(5.0, -2.0, 3.0, 4.0); #[rustfmt::skip] let r = f32x4::new(1.0, -6.0, 3.0, 4.0); assert_eq!( r, ::mem::transmute(__msa_fmin_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmin_d() { #[rustfmt::skip] let a = f64x2::new(1.0, 4.0); #[rustfmt::skip] let b = f64x2::new(3.0, 2.0); #[rustfmt::skip] let r = f64x2::new(1.0, 2.0); assert_eq!( r, ::mem::transmute(__msa_fmin_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmin_a_w() { #[rustfmt::skip] let a = f32x4::new(1.0, -6.0, -7.0, -8.0); #[rustfmt::skip] let b = f32x4::new(5.0, -2.0, 3.0, 4.0); #[rustfmt::skip] let r = f32x4::new(1.0, -2.0, 3.0, 4.0); assert_eq!( r, ::mem::transmute(__msa_fmin_a_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmin_a_d() { #[rustfmt::skip] let a = f64x2::new(1.0, -4.0); #[rustfmt::skip] let b = f64x2::new(3.0, 2.0); #[rustfmt::skip] let r = f64x2::new(1.0, 2.0); assert_eq!( r, ::mem::transmute(__msa_fmin_a_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmsub_w() { #[rustfmt::skip] let a = f32x4::new(1.0, 2.0, 3.0, 4.0); #[rustfmt::skip] let b = f32x4::new(5.0, 6.0, 7.0, 8.0); #[rustfmt::skip] let c = f32x4::new(9.0, 10.0, 11.0, 12.0); #[rustfmt::skip] let r = f32x4::new(-44.0, -58.0, -74.0, -92.0); assert_eq!( r, ::mem::transmute(__msa_fmsub_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmsub_d() { #[rustfmt::skip] let a = f64x2::new(1.0, 2.0); #[rustfmt::skip] let b = f64x2::new(3.0, 4.0); #[rustfmt::skip] let c = f64x2::new(5.0, 6.0); #[rustfmt::skip] let r = f64x2::new(-14.0, -22.0); assert_eq!( r, ::mem::transmute(__msa_fmsub_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmul_w() { #[rustfmt::skip] let a = f32x4::new(1.1, -2.2, 3.3, 4.4); #[rustfmt::skip] let b = f32x4::new(4.4, 3.3, 2.2, -1.1); #[rustfmt::skip] let r = f32x4::new(4.84, -7.26, 7.26, -4.84); assert_eq!( r, ::mem::transmute(__msa_fmul_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fmul_d() { #[rustfmt::skip] let a = f64x2::new(1.1, -2.2); #[rustfmt::skip] let b = f64x2::new(4.0, -3.3); #[rustfmt::skip] let r = f64x2::new(4.4, 7.26); assert_eq!( r, ::mem::transmute(__msa_fmul_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_frint_w() { #[rustfmt::skip] let a = f32x4::new(2.6, -2.7, 1.3, -1.7);; #[rustfmt::skip] let r = f32x4::new(3.0, -3.0, 1.0, -2.0); assert_eq!(r, ::mem::transmute(__msa_frint_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_frint_d() { #[rustfmt::skip] let a = f64x2::new(2.6, 1.3); #[rustfmt::skip] let r = f64x2::new(3.0, 1.0); assert_eq!(r, ::mem::transmute(__msa_frint_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_frcp_w() { #[rustfmt::skip] let a = f32x4::new(2.6, -2.7, 1.3, -1.7);; #[rustfmt::skip] let r = f32x4::new( 0.3846154, -0.37037036, 0.7692308, -0.58823526 ); assert_eq!(r, ::mem::transmute(__msa_frcp_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_frcp_d() { #[rustfmt::skip] let a = f64x2::new(2.6, 1.3); #[rustfmt::skip] let r = f64x2::new(0.3846153846153846, 0.7692307692307692); assert_eq!(r, ::mem::transmute(__msa_frcp_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_frsqrt_w() { #[rustfmt::skip] let a = f32x4::new(2.6, 2.7, 1.3, 1.7);; #[rustfmt::skip] let r = f32x4::new( 0.6201737, 0.6085806, 0.87705797, 0.766965 ); assert_eq!(r, ::mem::transmute(__msa_frsqrt_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_frsqrt_d() { #[rustfmt::skip] let a = f64x2::new(2.6, 1.3); #[rustfmt::skip] let r = f64x2::new(0.6201736729460422, 0.8770580193070292); assert_eq!(r, ::mem::transmute(__msa_frsqrt_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsaf_w() { #[rustfmt::skip] let a = f32x4::new(-5.5, 5.5, 5.5, 5.5); #[rustfmt::skip] let b = f32x4::new(-5.5, 5.5, 5.5, 5.5); #[rustfmt::skip] let r = i32x4::new(0, 0, 0, 0); assert_eq!( r, ::mem::transmute(__msa_fsaf_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsaf_d() { #[rustfmt::skip] let a = f64x2::new(-125.5, 5.5); #[rustfmt::skip] let b = f64x2::new(125.5, 3.3); #[rustfmt::skip] let r = i64x2::new(0, 0); assert_eq!( r, ::mem::transmute(__msa_fsaf_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fseq_w() { #[rustfmt::skip] let a = f32x4::new(-5.5, -3.3, f32::NAN, f32::NAN); #[rustfmt::skip] let b = f32x4::new(5.5, -3.3, f32::NAN, 1.1); #[rustfmt::skip] let r = i32x4::new(0, -1, 0, 0); assert_eq!( r, ::mem::transmute(__msa_fseq_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fseq_d() { #[rustfmt::skip] let a = f64x2::new(-125.5, 5.5); #[rustfmt::skip] let b = f64x2::new(125.5, 5.5); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fseq_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsle_w() { #[rustfmt::skip] let a = f32x4::new(5.5, 5.5, 5.5, f32::NAN); #[rustfmt::skip] let b = f32x4::new(-5.5, 3.3, 5.5, f32::NAN); #[rustfmt::skip] let r = i32x4::new(0, 0, -1, 0); assert_eq!( r, ::mem::transmute(__msa_fsle_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsle_d() { #[rustfmt::skip] let a = f64x2::new(-125.5, 5.5); #[rustfmt::skip] let b = f64x2::new(125.5, 3.3); #[rustfmt::skip] let r = i64x2::new(-1, 0); assert_eq!( r, ::mem::transmute(__msa_fsle_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fslt_w() { #[rustfmt::skip] let a = f32x4::new(-5.5, 5.5, 5.5, 5.5); #[rustfmt::skip] let b = f32x4::new(5.5, 3.3, 5.5, 1.1); #[rustfmt::skip] let r = i32x4::new(-1, 0, 0, 0); assert_eq!( r, ::mem::transmute(__msa_fslt_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fslt_d() { #[rustfmt::skip] let a = f64x2::new(-125.5, 5.5); #[rustfmt::skip] let b = f64x2::new(125.5, 3.3); #[rustfmt::skip] let r = i64x2::new(-1, 0); assert_eq!( r, ::mem::transmute(__msa_fslt_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsne_w() { #[rustfmt::skip] let a = f32x4::new(-5.5, 5.5, 5.5, 5.5); #[rustfmt::skip] let b = f32x4::new(5.5, 3.3, 5.5, 1.1); #[rustfmt::skip] let r = i32x4::new(-1, -1, 0, -1); assert_eq!( r, ::mem::transmute(__msa_fsne_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsne_d() { #[rustfmt::skip] let a = f64x2::new(-125.5, 5.5); #[rustfmt::skip] let b = f64x2::new(125.5, 5.5); #[rustfmt::skip] let r = i64x2::new(-1, 0); assert_eq!( r, ::mem::transmute(__msa_fsne_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsor_w() { #[rustfmt::skip] let a = f32x4::new(-5.5, f32::NAN, 5.5, 5.5); #[rustfmt::skip] let b = f32x4::new(5.5, 3.3, 5.5, 1.1); #[rustfmt::skip] let r = i32x4::new(-1, 0, -1, -1); assert_eq!( r, ::mem::transmute(__msa_fsor_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsor_d() { #[rustfmt::skip] let a = f64x2::new(-125.5, 5.5); #[rustfmt::skip] let b = f64x2::new(125.5, f64::NAN); #[rustfmt::skip] let r = i64x2::new(-1, 0); assert_eq!( r, ::mem::transmute(__msa_fsor_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsqrt_w() { #[rustfmt::skip] let a = f32x4::new(9.0, 81.0, 1089.0, 10000.0); #[rustfmt::skip] let r = f32x4::new(3.0, 9.0, 33.0, 100.0); assert_eq!(r, ::mem::transmute(__msa_fsqrt_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsqrt_d() { #[rustfmt::skip] let a = f64x2::new(81.0, 10000.0); #[rustfmt::skip] let r = f64x2::new(9.0, 100.0); assert_eq!(r, ::mem::transmute(__msa_fsqrt_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsub_w() { #[rustfmt::skip] let a = f32x4::new(5.5, 6.5, 7.5, 8.5); #[rustfmt::skip] let b = f32x4::new(1.25, 1.75, 2.25, 2.75); #[rustfmt::skip] let r = f32x4::new(4.25, 4.75, 5.25, 5.75); assert_eq!( r, ::mem::transmute(__msa_fsub_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsub_d() { #[rustfmt::skip] let a = f64x2::new(555.5, 55.5); #[rustfmt::skip] let b = f64x2::new(4.25, 3.25); #[rustfmt::skip] let r = f64x2::new(551.25, 52.25); assert_eq!( r, ::mem::transmute(__msa_fsub_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsueq_w() { #[rustfmt::skip] let a = f32x4::new(5.5, f32::NAN, 5.5, 5.5); #[rustfmt::skip] let b = f32x4::new(5.5, 5.5, -5.5, 5.5); #[rustfmt::skip] let r = i32x4::new(-1, -1, 0, -1); assert_eq!( r, ::mem::transmute(__msa_fsueq_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsueq_d() { #[rustfmt::skip] let a = f64x2::new(-5.5, 5.5); #[rustfmt::skip] let b = f64x2::new(5.5, f64::NAN); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fsueq_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsule_w() { #[rustfmt::skip] let a = f32x4::new(5.7, 5.8, 5.9, f32::NAN); #[rustfmt::skip] let b = f32x4::new(5.6, 5.9, 5.9, f32::NAN); #[rustfmt::skip] let r = i32x4::new(0, -1, -1, -1); assert_eq!( r, ::mem::transmute(__msa_fsule_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsule_d() { #[rustfmt::skip] let a = f64x2::new(5.5, 5.5); #[rustfmt::skip] let b = f64x2::new(5.5, 5.5); #[rustfmt::skip] let r = i64x2::new(-1, -1); assert_eq!( r, ::mem::transmute(__msa_fsule_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsult_w() { #[rustfmt::skip] let a = f32x4::new(5.5, 5.5, 5.5, 5.5); #[rustfmt::skip] let b = f32x4::new(5.6, f32::NAN, 2.2, 1.1); #[rustfmt::skip] let r = i32x4::new(-1, -1, 0, 0); assert_eq!( r, ::mem::transmute(__msa_fsult_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsult_d() { #[rustfmt::skip] let a = f64x2::new(5.5, f64::NAN); #[rustfmt::skip] let b = f64x2::new(4.4, 3.3); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fsult_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsun_w() { #[rustfmt::skip] let a = f32x4::new(5.5, 5.5, f32::NAN, 5.5); #[rustfmt::skip] let b = f32x4::new(4.4, 3.3, 2.2, f32::NAN); #[rustfmt::skip] let r = i32x4::new(0, 0, -1, -1); assert_eq!( r, ::mem::transmute(__msa_fsun_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsun_d() { #[rustfmt::skip] let a = f64x2::new(5.5, f64::NAN); #[rustfmt::skip] let b = f64x2::new(4.4, 3.3); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fsun_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsune_w() { #[rustfmt::skip] let a = f32x4::new(5.5, 5.5, f32::NAN, 5.5); #[rustfmt::skip] let b = f32x4::new(4.4, 3.3, 2.2, 5.5); #[rustfmt::skip] let r = i32x4::new(-1, -1, -1, 0); assert_eq!( r, ::mem::transmute(__msa_fsune_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_fsune_d() { #[rustfmt::skip] let a = f64x2::new(5.5, f64::NAN); #[rustfmt::skip] let b = f64x2::new(5.5, 3.3); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_fsune_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftint_s_w() { #[rustfmt::skip] let a = f32x4::new(-5.5, 75.6, -1000.7, 1219.3); #[rustfmt::skip] let r = i32x4::new(-6, 76, -1001, 1219); assert_eq!(r, ::mem::transmute(__msa_ftint_s_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftint_s_d() { #[rustfmt::skip] let a = f64x2::new(-5.5, 25656.4); #[rustfmt::skip] let r = i64x2::new(-6, 25656); assert_eq!(r, ::mem::transmute(__msa_ftint_s_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftint_u_w() { #[rustfmt::skip] let a = f32x4::new(-5.5, 75.6, -1000.7, 1219.3); #[rustfmt::skip] let r = u32x4::new(0, 76, 0, 1219); assert_eq!(r, ::mem::transmute(__msa_ftint_u_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftint_u_d() { #[rustfmt::skip] let a = f64x2::new(5.5, -25656.4); #[rustfmt::skip] let r = u64x2::new(6, 0); assert_eq!(r, ::mem::transmute(__msa_ftint_u_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftq_h() { #[rustfmt::skip] let a = f32x4::new(0.00001, 0.0002, 0.00001, -0.0002); #[rustfmt::skip] let b = f32x4::new(0.0001, -0.002, 0.0001, 0.002); #[rustfmt::skip] let r = i16x8::new(3, -66, 3, 66, 0, 7, 0, -7); assert_eq!( r, ::mem::transmute(__msa_ftq_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftq_w() { #[rustfmt::skip] let a = f64x2::new(0.00001, -0.0002); #[rustfmt::skip] let b = f64x2::new(0.00000045, 0.000015); #[rustfmt::skip] let r = i32x4::new(966, 32212, 21475, -429497); assert_eq!( r, ::mem::transmute(__msa_ftq_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftrunc_s_w() { #[rustfmt::skip] let a = f32x4::new(-5.5, 75.6, -1000.7, 1219.3); #[rustfmt::skip] let r = i32x4::new(-5, 75, -1000, 1219); assert_eq!(r, ::mem::transmute(__msa_ftrunc_s_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftrunc_s_d() { #[rustfmt::skip] let a = f64x2::new(-5.5, 25656.4); #[rustfmt::skip] let r = i64x2::new(-5, 25656); assert_eq!(r, ::mem::transmute(__msa_ftrunc_s_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftrunc_u_w() { #[rustfmt::skip] let a = f32x4::new(-5.5, 75.6, -1000.7, 1219.3); #[rustfmt::skip] let r = u32x4::new(0, 75, 0, 1219); assert_eq!(r, ::mem::transmute(__msa_ftrunc_u_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ftrunc_u_d() { #[rustfmt::skip] let a = f64x2::new(5.5, -25656.4); #[rustfmt::skip] let r = u64x2::new(5, 0); assert_eq!(r, ::mem::transmute(__msa_ftrunc_u_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_hadd_s_h() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i16x8::new(6, 6, 2, -2, 6, 6, 2, -2); assert_eq!( r, ::mem::transmute(__msa_hadd_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hadd_s_w() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i16x8::new( 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i32x4::new(6, 6, 2, -2); assert_eq!( r, ::mem::transmute(__msa_hadd_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hadd_s_d() { #[rustfmt::skip] let a = i32x4::new(1, -2, 3, -4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i64x2::new(2, -2); assert_eq!( r, ::mem::transmute(__msa_hadd_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hadd_u_h() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = u16x8::new(6, 6, 6, 6, 6, 6, 6, 6); assert_eq!( r, ::mem::transmute(__msa_hadd_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hadd_u_w() { #[rustfmt::skip] let a = u16x8::new( 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u16x8::new( 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = u32x4::new(6, 6, 6, 6); assert_eq!( r, ::mem::transmute(__msa_hadd_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hadd_u_d() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = u64x2::new(6, 6); assert_eq!( r, ::mem::transmute(__msa_hadd_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hsub_s_h() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i16x8::new(-2, 2, -6, -6, -2, 2, -6, -6); assert_eq!( r, ::mem::transmute(__msa_hsub_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hsub_s_w() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i16x8::new( 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i32x4::new(-2, 2, -6, -6); assert_eq!( r, ::mem::transmute(__msa_hsub_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hsub_s_d() { #[rustfmt::skip] let a = i32x4::new(1, -2, 3, -4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i64x2::new(-6, -6); assert_eq!( r, ::mem::transmute(__msa_hsub_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hsub_u_h() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i16x8::new(-2, 2, -2, 2, -2, 2, -2, 2); assert_eq!( r, ::mem::transmute(__msa_hsub_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hsub_u_w() { #[rustfmt::skip] let a = u16x8::new( 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u16x8::new( 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i32x4::new(-2, 2, -2, 2); assert_eq!( r, ::mem::transmute(__msa_hsub_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_hsub_u_d() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i64x2::new(-2, 2); assert_eq!( r, ::mem::transmute(__msa_hsub_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvev_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3 ); assert_eq!( r, ::mem::transmute(__msa_ilvev_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvev_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new( 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i16x8::new(4, 1, 2, 3, 4, 1, 2, 3); assert_eq!( r, ::mem::transmute(__msa_ilvev_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvev_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i32x4::new(4, 1, 2, 3); assert_eq!( r, ::mem::transmute(__msa_ilvev_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvev_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(4, 3); #[rustfmt::skip] let r = i64x2::new(4, 1); assert_eq!( r, ::mem::transmute(__msa_ilvev_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvl_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let b = i8x16::new( 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( 8, 9, 7, 10, 6, 11, 5, 12, 4, 13, 3, 14, 2, 15, 1, 16 ); assert_eq!( r, ::mem::transmute(__msa_ilvl_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvl_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 5, 6, 7, 8 ); #[rustfmt::skip] let b = i16x8::new( 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i16x8::new(4, 5, 3, 6, 2, 7, 1, 8); assert_eq!( r, ::mem::transmute(__msa_ilvl_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvl_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i32x4::new(2, 3, 1, 4); assert_eq!( r, ::mem::transmute(__msa_ilvl_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvl_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(2, 1); #[rustfmt::skip] let r = i64x2::new(1, 2); assert_eq!( r, ::mem::transmute(__msa_ilvl_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvod_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let b = i8x16::new( 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( 15, 2, 13, 4, 11, 6, 9, 8, 7, 10, 5, 12, 3, 14, 1, 16 ); assert_eq!( r, ::mem::transmute(__msa_ilvod_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvod_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 5, 6, 7, 8 ); #[rustfmt::skip] let b = i16x8::new( 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i16x8::new(7, 2, 5, 4, 3, 6, 1, 8); assert_eq!( r, ::mem::transmute(__msa_ilvod_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvod_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i32x4::new(3, 2, 1, 4); assert_eq!( r, ::mem::transmute(__msa_ilvod_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvod_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(2, 1); #[rustfmt::skip] let r = i64x2::new(1, 2); assert_eq!( r, ::mem::transmute(__msa_ilvod_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvr_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let b = i8x16::new( 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( 16, 1, 15, 2, 14, 3, 13, 4, 12, 5, 11, 6, 10, 7, 9, 8 ); assert_eq!( r, ::mem::transmute(__msa_ilvr_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvr_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 5, 6, 7, 8, ); #[rustfmt::skip] let b = i16x8::new( 8, 7, 6, 5, 4, 3, 2, 1, ); #[rustfmt::skip] let r = i16x8::new(8, 1, 7, 2, 6, 3, 5, 4); assert_eq!( r, ::mem::transmute(__msa_ilvr_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvr_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i32x4::new(4, 1, 3, 2); assert_eq!( r, ::mem::transmute(__msa_ilvr_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ilvr_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(2, 1); #[rustfmt::skip] let r = i64x2::new(2, 1); assert_eq!( r, ::mem::transmute(__msa_ilvr_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_insert_b() { #[rustfmt::skip] let a = i8x16::new( -100, 127, 4, 127, -100, 127, 4, 127, -100, 127, 4, 127, -100, 127, 4, 127 ); #[rustfmt::skip] let r = i8x16::new( -100, 127, 4, 127, -100, 127, 4, 127, -100, 127, 4, 127, 5, 127, 4, 127 ); assert_eq!( r, ::mem::transmute(__msa_insert_b(::mem::transmute(a), 12, 5)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_insert_h() { #[rustfmt::skip] let a = i16x8::new( 32767, 3276, 100, 11, 32767, 3276, 100, 11 ); #[rustfmt::skip] let r = i16x8::new( 32767, 3276, 100, 11, 5, 3276, 100, 11 ); assert_eq!( r, ::mem::transmute(__msa_insert_h(::mem::transmute(a), 4, 5)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_insert_w() { #[rustfmt::skip] let a = i32x4::new(100, 2147483647, 5, -2147483647); #[rustfmt::skip] let r = i32x4::new(100, 7, 5, -2147483647); assert_eq!( r, ::mem::transmute(__msa_insert_w(::mem::transmute(a), 1, 7)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_insert_d() { #[rustfmt::skip] let a = i64x2::new(3, i64::max_value()); #[rustfmt::skip] let r = i64x2::new(3, 100); assert_eq!( r, ::mem::transmute(__msa_insert_d(::mem::transmute(a), 1, 100)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_insve_b() { #[rustfmt::skip] let a = i8x16::new( -100, i8::max_value(), 4, i8::max_value(), -100, i8::max_value(), 4, i8::max_value(), -100, i8::max_value(), 4, i8::max_value(), -100, i8::max_value(), 4, i8::max_value() ); #[rustfmt::skip] let b = i8x16::new( 5, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let r = i8x16::new( -100, 127, 4, 127, -100, 127, 4, 127, -100, 127, 4, 127, 5, 127, 4, 127 ); assert_eq!( r, ::mem::transmute(__msa_insve_b(::mem::transmute(a), 12, ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_insve_h() { #[rustfmt::skip] let a = i16x8::new( i16::max_value(), 3276, 100, 11, i16::max_value(), 3276, 100, 11 ); #[rustfmt::skip] let b = i16x8::new( 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let r = i16x8::new( 32767, 3276, 100, 11, 1, 3276, 100, 11 ); assert_eq!( r, ::mem::transmute(__msa_insve_h(::mem::transmute(a), 4, ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_insve_w() { #[rustfmt::skip] let a = i32x4::new(100, 2147483647, 5, -2147483647); #[rustfmt::skip] let b = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let r = i32x4::new(100, 2147483647, 5, 1); assert_eq!( r, ::mem::transmute(__msa_insve_w(::mem::transmute(a), 3, ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_insve_d() { #[rustfmt::skip] let a = i64x2::new(3, i64::max_value()); #[rustfmt::skip] let b = i64x2::new(1, 2); #[rustfmt::skip] let r = i64x2::new(3, 1); assert_eq!( r, ::mem::transmute(__msa_insve_d(::mem::transmute(a), 1, ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ld_b() { #[rustfmt::skip] let mut a : [i8; 32] = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ]; let p = &mut a[4] as *mut _ as *mut u8; #[rustfmt::skip] let r = i8x16::new( 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 ); assert_eq!(r, ::mem::transmute(__msa_ld_b(p, 9))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ld_h() { #[rustfmt::skip] let mut a : [i16; 16] = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ]; let p = &mut a[4] as *mut _ as *mut u8; #[rustfmt::skip] let r = i16x8::new(3, 4, 5, 6, 7, 8, 9, 10); assert_eq!(r, ::mem::transmute(__msa_ld_h(p, -2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ld_w() { #[rustfmt::skip] let mut a : [i32; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; let p = &mut a[3] as *mut _ as *mut u8; #[rustfmt::skip] let r = i32x4::new(2, 3, 4, 5); assert_eq!(r, ::mem::transmute(__msa_ld_w(p, -4))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ld_d() { #[rustfmt::skip] let mut a : [i64; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; let p = &mut a[4] as *mut _ as *mut u8; #[rustfmt::skip] let r = i64x2::new(0, 1); assert_eq!(r, ::mem::transmute(__msa_ld_d(p, -32))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ldi_b() { #[rustfmt::skip] let r = i8x16::new( -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20, -20 ); assert_eq!(r, ::mem::transmute(__msa_ldi_b(-20))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ldi_h() { #[rustfmt::skip] let r = i16x8::new( 255, 255, 255, 255, 255, 255, 255, 255 ); assert_eq!(r, ::mem::transmute(__msa_ldi_h(255))); } #[simd_test(enable = "msa")] unsafe fn test_msa_ldi_w() { #[rustfmt::skip] let r = i32x4::new(-509, -509, -509, -509); assert_eq!(r, ::mem::transmute(__msa_ldi_w(-509))); } // FIXME: https://reviews.llvm.org/D59884 // If target type is i64, negative immediate loses the sign // Test passes if 4294967185 is used instead -111 in vector 'r' // #[simd_test(enable = "msa")] // unsafe fn test_msa_ldi_d() { // let r = i64x2::new(-111, -111); // assert_eq!(r, ::mem::transmute(__msa_ldi_d(-111))); // } #[simd_test(enable = "msa")] unsafe fn test_msa_madd_q_h() { #[rustfmt::skip] let a = i16x8::new( i16::max_value(), 1024, i16::min_value(), -1024, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new( 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 ); #[rustfmt::skip] let c = i16x8::new( i16::max_value(), i16::max_value(), 1, -1, 33, 66, 99, 132 ); #[rustfmt::skip] let r = i16x8::new(32767, 2047, -32768, -1025, 2, 4, 6, 8); assert_eq!( r, ::mem::transmute(__msa_madd_q_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_madd_q_w() { #[rustfmt::skip] let a = i32x4::new(i32::max_value(), i32::min_value(), 1, 2); #[rustfmt::skip] let b = i32x4::new(102401, 102401, 102401, 102401); #[rustfmt::skip] let c = i32x4::new(10240, 20480, 30720, 40960); #[rustfmt::skip] let r = i32x4::new(2147483647, -2147483648, 2, 3); assert_eq!( r, ::mem::transmute(__msa_madd_q_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_maddr_q_h() { #[rustfmt::skip] let a = i16x8::new( 32767, 1024, -32768, -1024, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new( 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 ); #[rustfmt::skip] let c = i16x8::new( 32767, 32767, 32767, 32767, 33, 66, 99, 132 ); #[rustfmt::skip] let r = i16x8::new(32767, 2048, -31744, 0, 2, 4, 6, 8); assert_eq!( r, ::mem::transmute(__msa_maddr_q_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_maddr_q_w() { #[rustfmt::skip] let a = i32x4::new(i32::max_value(), i32::min_value(), 1, 2); #[rustfmt::skip] let b = i32x4::new(102401, 102401, 102401, 102401); #[rustfmt::skip] let c = i32x4::new(10240, 20480, 30720, 40960); #[rustfmt::skip] let r = i32x4::new(2147483647, -2147483647, 2, 4); assert_eq!( r, ::mem::transmute(__msa_maddr_q_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_maddv_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i8x16::new( 5, 6, 7, 8, 5, 6, 7, 8, 5, 6, 7, 8, 5, 6, 7, 8 ); #[rustfmt::skip] let c = i8x16::new( 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12 ); #[rustfmt::skip] let r = i8x16::new( 46, 62, 80, 100, 46, 62, 80, 100, 46, 62, 80, 100, 46, 62, 80, 100 ); assert_eq!( r, ::mem::transmute(__msa_maddv_b( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_maddv_h() { #[rustfmt::skip] let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = i16x8::new(5, 6, 7, 8, 5, 6, 7, 8); #[rustfmt::skip] let c = i16x8::new(9, 10, 11, 12, 9, 10, 11, 12); #[rustfmt::skip] let r = i16x8::new(46, 62, 80, 100, 46, 62, 80, 100); assert_eq!( r, ::mem::transmute(__msa_maddv_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_maddv_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 1, 2); #[rustfmt::skip] let b = i32x4::new(3, 4, 3, 4); #[rustfmt::skip] let c = i32x4::new(5, 6, 5, 6); #[rustfmt::skip] let r = i32x4::new(16, 26, 16, 26); assert_eq!( r, ::mem::transmute(__msa_maddv_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_maddv_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(3, 4); #[rustfmt::skip] let c = i64x2::new(5, 6); #[rustfmt::skip] let r = i64x2::new(16, 26); assert_eq!( r, ::mem::transmute(__msa_maddv_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_a_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( -6, -7, -8, -9, 6, 7, 8, 9, -6, -7, -8, -9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = i8x16::new( -6, -7, -8, -9, 6, 7, 8, 9, -6, -7, -8, -9, 6, 7, 8, 9 ); assert_eq!( r, ::mem::transmute(__msa_max_a_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_a_h() { #[rustfmt::skip] let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); #[rustfmt::skip] let b = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); #[rustfmt::skip] let r = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); assert_eq!( r, ::mem::transmute(__msa_max_a_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_a_w() { #[rustfmt::skip] let a = i32x4::new(1, -2, 3, -4); #[rustfmt::skip] let b = i32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = i32x4::new(6, 7, 8, 9); assert_eq!( r, ::mem::transmute(__msa_max_a_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_a_d() { #[rustfmt::skip] let a = i64x2::new(-1, 2); #[rustfmt::skip] let b = i64x2::new(6, -7); #[rustfmt::skip] let r = i64x2::new(6, -7); assert_eq!( r, ::mem::transmute(__msa_max_a_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_s_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( -6, -7, -8, -9, 6, 7, 8, 9, -6, -7, -8, -9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = i8x16::new( 1, 2, 3, 4, 6, 7, 8, 9, 1, 2, 3, 4, 6, 7, 8, 9 ); assert_eq!( r, ::mem::transmute(__msa_max_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_s_h() { #[rustfmt::skip] let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); #[rustfmt::skip] let b = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); #[rustfmt::skip] let r = i16x8::new(1, 7, 3, 9, 1, 7, 3, 9); assert_eq!( r, ::mem::transmute(__msa_max_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_s_w() { #[rustfmt::skip] let a = i32x4::new(1, -2, 3, -4); #[rustfmt::skip] let b = i32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = i32x4::new(6, 7, 8, 9); assert_eq!( r, ::mem::transmute(__msa_max_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_s_d() { #[rustfmt::skip] let a = i64x2::new(-1, 2); #[rustfmt::skip] let b = i64x2::new(6, -7); #[rustfmt::skip] let r = i64x2::new(6, 2); assert_eq!( r, ::mem::transmute(__msa_max_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_u_b() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); assert_eq!( r, ::mem::transmute(__msa_max_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_u_h() { #[rustfmt::skip] let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); assert_eq!( r, ::mem::transmute(__msa_max_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_u_w() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(6, 7, 8, 9); assert_eq!( r, ::mem::transmute(__msa_max_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_max_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 2); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = u64x2::new(6, 7); assert_eq!( r, ::mem::transmute(__msa_max_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_maxi_s_b() { #[rustfmt::skip] let a = i8x16::new( 1, -20, -6, 8, 1, -20, -6, 8, 1, -20, -6, 8, 1, -20, -6, 8 ); #[rustfmt::skip] let r = i8x16::new( 1, -16, -6, 8, 1, -16, -6, 8, 1, -16, -6, 8, 1, -16, -6, 8 ); assert_eq!( r, ::mem::transmute(__msa_maxi_s_b(::mem::transmute(a), -16)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_maxi_s_h() { #[rustfmt::skip] let a = i16x8::new(1, 3, -60, -8, 1, 3, -6, -8); #[rustfmt::skip] let r = i16x8::new(15, 15, 15, 15, 15, 15, 15, 15); assert_eq!(r, ::mem::transmute(__msa_maxi_s_h(::mem::transmute(a), 15))); } #[simd_test(enable = "msa")] unsafe fn test_msa_maxi_s_w() { #[rustfmt::skip] let a = i32x4::new(1, 3, -6, -8); #[rustfmt::skip] let r = i32x4::new(1, 3, -5, -5); assert_eq!(r, ::mem::transmute(__msa_maxi_s_w(::mem::transmute(a), -5))); } // FIXME: https://reviews.llvm.org/D59884 // If target type is i64, negative immediate loses the sign // Test passes if 4294967293 is used instead -3 in vector 'r' // #[simd_test(enable = "msa")] // unsafe fn test_msa_maxi_s_d() { // #[rustfmt::skip] // let a = i64x2::new(1, -8); // #[rustfmt::skip] // let r = i64x2::new(-3, -3); // assert_eq!(r, ::mem::transmute(__msa_maxi_s_d(::mem::transmute(a), -3))); // } #[simd_test(enable = "msa")] unsafe fn test_msa_maxi_u_b() { #[rustfmt::skip] let a = u8x16::new( 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8 ); #[rustfmt::skip] let r = u8x16::new( 5, 5, 6, 8, 5, 5, 6, 8, 5, 5, 6, 8, 5, 5, 6, 8 ); assert_eq!(r, ::mem::transmute(__msa_maxi_u_b(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_maxi_u_h() { #[rustfmt::skip] let a = u16x8::new(1, 3, 6, 8, 1, 3, 6, 8); #[rustfmt::skip] let r = u16x8::new(5, 5, 6, 8, 5, 5, 6, 8); assert_eq!(r, ::mem::transmute(__msa_maxi_u_h(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_maxi_u_w() { #[rustfmt::skip] let a = u32x4::new(1, 3, 6, 8); #[rustfmt::skip] let r = u32x4::new(5, 5, 6, 8); assert_eq!(r, ::mem::transmute(__msa_maxi_u_w(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_maxi_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 8); #[rustfmt::skip] let r = u64x2::new(5, 8); assert_eq!(r, ::mem::transmute(__msa_maxi_u_d(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_a_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( -6, -7, -8, -9, 6, 7, 8, 9, -6, -7, -8, -9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = i8x16::new( 1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4 ); assert_eq!( r, ::mem::transmute(__msa_min_a_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_a_h() { #[rustfmt::skip] let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); #[rustfmt::skip] let b = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); #[rustfmt::skip] let r = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); assert_eq!( r, ::mem::transmute(__msa_min_a_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_a_w() { #[rustfmt::skip] let a = i32x4::new(1, -2, 3, -4); #[rustfmt::skip] let b = i32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = i32x4::new(1, -2, 3, -4); assert_eq!( r, ::mem::transmute(__msa_min_a_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_a_d() { #[rustfmt::skip] let a = i64x2::new(-1, 2); #[rustfmt::skip] let b = i64x2::new(6, -7); #[rustfmt::skip] let r = i64x2::new(-1, 2); assert_eq!( r, ::mem::transmute(__msa_min_a_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_s_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( -6, -7, -8, -9, 6, 7, 8, 9, -6, -7, -8, -9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = i8x16::new( -6, -7, -8, -9, -1, -2, -3, -4, -6, -7, -8, -9, -1, -2, -3, -4 ); assert_eq!( r, ::mem::transmute(__msa_min_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_s_h() { #[rustfmt::skip] let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); #[rustfmt::skip] let b = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); #[rustfmt::skip] let r = i16x8::new(-6, -2, -8, -4, -6, -2, -8, -4); assert_eq!( r, ::mem::transmute(__msa_min_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_s_w() { #[rustfmt::skip] let a = i32x4::new(1, -2, 3, -4); #[rustfmt::skip] let b = i32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = i32x4::new(1, -2, 3, -4); assert_eq!( r, ::mem::transmute(__msa_min_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_s_d() { #[rustfmt::skip] let a = i64x2::new(-1, 2); #[rustfmt::skip] let b = i64x2::new(6, -7); #[rustfmt::skip] let r = i64x2::new(-1, -7); assert_eq!( r, ::mem::transmute(__msa_min_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mini_s_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let r = i8x16::new( -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10, -10 ); assert_eq!( r, ::mem::transmute(__msa_mini_s_b(::mem::transmute(a), -10)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mini_s_h() { #[rustfmt::skip] let a = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); #[rustfmt::skip] let r = i16x8::new(-3, -3, -3, -4, -3, -3, -3, -4); assert_eq!(r, ::mem::transmute(__msa_mini_s_h(::mem::transmute(a), -3))); } #[simd_test(enable = "msa")] unsafe fn test_msa_mini_s_w() { #[rustfmt::skip] let a = i32x4::new(1, -2, 3, -4); #[rustfmt::skip] let r = i32x4::new(-3, -3, -3, -4); assert_eq!(r, ::mem::transmute(__msa_mini_s_w(::mem::transmute(a), -3))); } // FIXME: https://reviews.llvm.org/D59884 // If target type is i64, negative immediate loses the sign // -3 is represented as 4294967293 // #[simd_test(enable = "msa")] // unsafe fn test_msa_mini_s_d() { // #[rustfmt::skip] // let a = i64x2::new(-3, 2); // #[rustfmt::skip] // let r = i64x2::new(-1, -3); // assert_eq!(r, ::mem::transmute(__msa_mini_s_d(::mem::transmute(a), -3))); // } #[simd_test(enable = "msa")] unsafe fn test_msa_min_u_b() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); assert_eq!( r, ::mem::transmute(__msa_min_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_u_h() { #[rustfmt::skip] let a = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4,); assert_eq!( r, ::mem::transmute(__msa_min_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_u_w() { #[rustfmt::skip] let a = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(1, 2, 3, 4,); assert_eq!( r, ::mem::transmute(__msa_min_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_min_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 2); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = u64x2::new(1, 2,); assert_eq!( r, ::mem::transmute(__msa_min_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mini_u_b() { #[rustfmt::skip] let a = u8x16::new( 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8, 1, 3, 6, 8 ); #[rustfmt::skip] let r = u8x16::new( 1, 3, 5, 5, 1, 3, 5, 5, 1, 3, 5, 5, 1, 3, 5, 5 ); assert_eq!(r, ::mem::transmute(__msa_mini_u_b(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_mini_u_h() { #[rustfmt::skip] let a = u16x8::new(1, 3, 6, 8, 1, 3, 6, 8); #[rustfmt::skip] let r = u16x8::new(1, 3, 5, 5, 1, 3, 5, 5); assert_eq!(r, ::mem::transmute(__msa_mini_u_h(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_mini_u_w() { #[rustfmt::skip] let a = u32x4::new(1, 3, 6, 8); #[rustfmt::skip] let r = u32x4::new(1, 3, 5, 5); assert_eq!(r, ::mem::transmute(__msa_mini_u_w(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_mini_u_d() { #[rustfmt::skip] let a = u64x2::new(1, 8); #[rustfmt::skip] let r = u64x2::new(1, 5); assert_eq!(r, ::mem::transmute(__msa_mini_u_d(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_mod_s_b() { #[rustfmt::skip] let a = i8x16::new( -6, -7, -8, -9, 6, 7, 8, 9, -6, -7, -8, -9, 6, 7, 8, 9 ); #[rustfmt::skip] let b = i8x16::new( 1, 2, 3, 4, -1, -2, -3, -4, 1, 2, 3, 4, -1, -2, -3, -4 ); #[rustfmt::skip] let r = i8x16::new( 0, -1, -2, -1, 0, 1, 2, 1, 0, -1, -2, -1, 0, 1, 2, 1 ); assert_eq!( r, ::mem::transmute(__msa_mod_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mod_s_h() { #[rustfmt::skip] let a = i16x8::new(-6, 7, -8, 9, -6, 7, -8, 9); #[rustfmt::skip] let b = i16x8::new(1, -2, 3, -4, 1, -2, 3, -4); #[rustfmt::skip] let r = i16x8::new(0, 1, -2, 1, 0, 1, -2, 1); assert_eq!( r, ::mem::transmute(__msa_mod_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mod_s_w() { #[rustfmt::skip] let a = i32x4::new(6, 7, 8, 9); #[rustfmt::skip] let b = i32x4::new(1, -2, 3, -4); #[rustfmt::skip] let r = i32x4::new(0, 1, 2, 1); assert_eq!( r, ::mem::transmute(__msa_mod_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mod_s_d() { #[rustfmt::skip] let a = i64x2::new(6, -7); #[rustfmt::skip] let b = i64x2::new(-1, 2); #[rustfmt::skip] let r = i64x2::new(0, -1); assert_eq!( r, ::mem::transmute(__msa_mod_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mod_u_b() { #[rustfmt::skip] let a = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let b = u8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let r = u8x16::new( 0, 1, 2, 1, 0, 1, 2, 1, 0, 1, 2, 1, 0, 1, 2, 1 ); assert_eq!( r, ::mem::transmute(__msa_mod_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mod_u_h() { #[rustfmt::skip] let a = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let b = u16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let r = u16x8::new(0, 1, 2, 1, 0, 1, 2, 1); assert_eq!( r, ::mem::transmute(__msa_mod_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mod_u_w() { #[rustfmt::skip] let a = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let b = u32x4::new(1, 2, 3, 4); #[rustfmt::skip] let r = u32x4::new(0, 1, 2, 1); assert_eq!( r, ::mem::transmute(__msa_mod_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mod_u_d() { #[rustfmt::skip] let a = u64x2::new(6, 7); #[rustfmt::skip] let b = u64x2::new(1, 2); #[rustfmt::skip] let r = u64x2::new(0, 1); assert_eq!( r, ::mem::transmute(__msa_mod_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_move_v() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8 ); #[rustfmt::skip] let r = i8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8 ); assert_eq!(r, ::mem::transmute(__msa_move_v(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_msub_q_h() { #[rustfmt::skip] let a = i16x8::new( 1024, -1024, 1024, -1024, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new( 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1025 ); #[rustfmt::skip] let c = i16x8::new( 1024, 2048, 3072, 4096, 1024, 2048, 3072, 4096 ); #[rustfmt::skip] let r = i16x8::new(991, -1089, 927, -1153, -32, -63, -94, -125); assert_eq!( r, ::mem::transmute(__msa_msub_q_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_msub_q_w() { #[rustfmt::skip] let a = i32x4::new(2147483647, -2147483647, 1, 2); #[rustfmt::skip] let b = i32x4::new(10240, 10240, 10240, 10240); #[rustfmt::skip] let c = i32x4::new(10240, 20480, 30720, 40960); #[rustfmt::skip] let r = i32x4::new(2147483646, -2147483648, 0, 1); assert_eq!( r, ::mem::transmute(__msa_msub_q_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_msubr_q_h() { #[rustfmt::skip] let a = i16x8::new( 1024, -1024, 1024, -1024, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new( 1025, 1025, 1025, 1025, 1025, 1025, 1025, 1025 ); #[rustfmt::skip] let c = i16x8::new( 1024, 2048, 3072, 4096, 1024, 2048, 3072, 4096 ); #[rustfmt::skip] let r = i16x8::new(992, -1088, 928, -1152, -31, -62, -93, -124); assert_eq!( r, ::mem::transmute(__msa_msubr_q_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_msubr_q_w() { #[rustfmt::skip] let a = i32x4::new(i32::max_value(), -2147483647, 1, 2); #[rustfmt::skip] let b = i32x4::new(10240, 10240, 10240, 10240); #[rustfmt::skip] let c = i32x4::new(10240, 20480, 30720, 40960); #[rustfmt::skip] let r = i32x4::new(2147483647, -2147483647, 1, 2); assert_eq!( r, ::mem::transmute(__msa_msubr_q_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_msubv_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i8x16::new( 5, 6, 7, 8, 5, 6, 7, 8, 5, 6, 7, 8, 5, 6, 7, 8 ); #[rustfmt::skip] let c = i8x16::new( 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12, 9, 10, 11, 12 ); #[rustfmt::skip] let r = i8x16::new( -44, -58, -74, -92, -44, -58, -74, -92, -44, -58, -74, -92, -44, -58, -74, -92 ); assert_eq!( r, ::mem::transmute(__msa_msubv_b( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_msubv_h() { #[rustfmt::skip] let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = i16x8::new(5, 6, 7, 8, 5, 6, 7, 8); #[rustfmt::skip] let c = i16x8::new(9, 10, 11, 12, 9, 10, 11, 12); #[rustfmt::skip] let r = i16x8::new(-44, -58, -74, -92, -44, -58, -74, -92); assert_eq!( r, ::mem::transmute(__msa_msubv_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_msubv_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 1, 2); #[rustfmt::skip] let b = i32x4::new(3, 4, 3, 4); #[rustfmt::skip] let c = i32x4::new(5, 6, 5, 6); #[rustfmt::skip] let r = i32x4::new(-14, -22, -14, -22); assert_eq!( r, ::mem::transmute(__msa_msubv_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_msubv_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(3, 4); #[rustfmt::skip] let c = i64x2::new(5, 6); #[rustfmt::skip] let r = i64x2::new(-14, -22); assert_eq!( r, ::mem::transmute(__msa_msubv_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mul_q_h() { #[rustfmt::skip] let a = i16x8::new( 12500, -20, -300, 400, 12500, 20, 300, 400 ); #[rustfmt::skip] let b = i16x8::new( 1250, 10240, -7585, 8456, 1250, 10240, -7585, 8456 ); #[rustfmt::skip] let r = i16x8::new(476, -7, 69, 103, 476, 6, -70, 103); assert_eq!( r, ::mem::transmute(__msa_mul_q_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mul_q_w() { #[rustfmt::skip] let a = i32x4::new( i32::max_value(), i32::max_value(), i32::min_value(), i32::min_value() ); #[rustfmt::skip] let b = i32x4::new(30, 60, 30, 60); #[rustfmt::skip] let r = i32x4::new(29, 59, -30, -60); assert_eq!( r, ::mem::transmute(__msa_mul_q_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mulr_q_h() { #[rustfmt::skip] let a = i16x8::new( 12500, -20, -300, 400, 12500, 20, 300, 400 ); #[rustfmt::skip] let b = i16x8::new( 1250, 10240, -7585, 8456, 1250, 10240, -7585, 8456 ); #[rustfmt::skip] let r = i16x8::new(477, -6, 69, 103, 477, 6, -69, 103); assert_eq!( r, ::mem::transmute(__msa_mulr_q_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mulr_q_w() { #[rustfmt::skip] let a = i32x4::new( i32::max_value(), i32::max_value(), i32::min_value(), i32::min_value() ); #[rustfmt::skip] let b = i32x4::new(30, 60, 30, 60); #[rustfmt::skip] let r = i32x4::new(30, 60, -30, -60); assert_eq!( r, ::mem::transmute(__msa_mulr_q_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mulv_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let b = i8x16::new( 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( 16, 30, 42, 52, 60, 66, 70, 72, 72, 70, 66, 60, 52, 42, 30, 16 ); assert_eq!( r, ::mem::transmute(__msa_mulv_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mulv_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 5, 6, 7, 8 ); #[rustfmt::skip] let b = i16x8::new( 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i16x8::new(8, 14, 18, 20, 20, 18, 14, 8); assert_eq!( r, ::mem::transmute(__msa_mulv_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mulv_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i32x4::new(4, 6, 6, 4); assert_eq!( r, ::mem::transmute(__msa_mulv_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_mulv_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(2, 1); #[rustfmt::skip] let r = i64x2::new(2, 2); assert_eq!( r, ::mem::transmute(__msa_mulv_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_nloc_b() { #[rustfmt::skip] let a = i8x16::new( -128, -64, -32, -16, -8, -4, -2, -1, 1, 2, 4, 8, 16, 32, 64, 127 ); #[rustfmt::skip] let r = i8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0 ); assert_eq!(r, ::mem::transmute(__msa_nloc_b(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_nloc_h() { #[rustfmt::skip] let a = i16x8::new( -32768, -16384, -8192, -4096, 4096, 8192, 16384, 32767 ); #[rustfmt::skip] let r = i16x8::new(1, 2, 3, 4, 0, 0, 0, 0); assert_eq!(r, ::mem::transmute(__msa_nloc_h(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_nloc_w() { #[rustfmt::skip] let a = i32x4::new( i32::min_value(), -1073741824, 1073741824, i32::max_value() ); #[rustfmt::skip] let r = i32x4::new(1, 2, 0, 0); assert_eq!(r, ::mem::transmute(__msa_nloc_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_nloc_d() { #[rustfmt::skip] let a = i64x2::new(i64::min_value(), i64::max_value()); #[rustfmt::skip] let r = i64x2::new(1, 0); assert_eq!(r, ::mem::transmute(__msa_nloc_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_nlzc_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let r = i8x16::new( 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3 ); assert_eq!(r, ::mem::transmute(__msa_nlzc_b(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_nlzc_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 5, 6, 7, 8 ); #[rustfmt::skip] let r = i16x8::new(15, 14, 14, 13, 13, 13, 13, 12); assert_eq!(r, ::mem::transmute(__msa_nlzc_h(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_nlzc_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let r = i32x4::new(31, 30, 30, 29); assert_eq!(r, ::mem::transmute(__msa_nlzc_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_nlzc_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let r = i64x2::new(63, 62); assert_eq!(r, ::mem::transmute(__msa_nlzc_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_nor_v() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let b = u8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let r = u8x16::new( 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, 239 ); assert_eq!( r, ::mem::transmute(__msa_nor_v(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_nori_b() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let r = u8x16::new( 250, 249, 248, 251, 250, 249, 248, 243, 242, 241, 240, 243, 242, 241, 240, 235 ); assert_eq!(r, ::mem::transmute(__msa_nori_b(::mem::transmute(a), 4))); } #[simd_test(enable = "msa")] unsafe fn test_msa_or_v() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let b = u8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let r = u8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); assert_eq!( r, ::mem::transmute(__msa_or_v(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_ori_b() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let r = u8x16::new( 5, 6, 7, 4, 5, 6, 7, 12, 13, 14, 15, 12, 13, 14, 15, 20 ); assert_eq!(r, ::mem::transmute(__msa_ori_b(::mem::transmute(a), 4))); } #[simd_test(enable = "msa")] unsafe fn test_msa_pckev_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( 4, 2, 4, 2, 4, 2, 4, 2, 1, 3, 1, 3, 1, 3, 1, 3 ); assert_eq!( r, ::mem::transmute(__msa_pckev_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_pckev_h() { #[rustfmt::skip] let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = i16x8::new(4, 3, 2, 1, 4, 3, 2, 1); #[rustfmt::skip] let r = i16x8::new(4, 2, 4, 2, 1, 3, 1, 3); assert_eq!( r, ::mem::transmute(__msa_pckev_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_pckev_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i32x4::new(4, 2, 1, 3); assert_eq!( r, ::mem::transmute(__msa_pckev_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_pckev_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(4, 3); #[rustfmt::skip] let r = i64x2::new(4, 1); assert_eq!( r, ::mem::transmute(__msa_pckev_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_pckod_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( 3, 1, 3, 1, 3, 1, 3, 1, 2, 4, 2, 4, 2, 4, 2, 4 ); assert_eq!( r, ::mem::transmute(__msa_pckod_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_pckod_h() { #[rustfmt::skip] let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = i16x8::new(4, 3, 2, 1, 4, 3, 2, 1); #[rustfmt::skip] let r = i16x8::new(3, 1, 3, 1, 2, 4, 2, 4); assert_eq!( r, ::mem::transmute(__msa_pckod_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_pckod_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i32x4::new(3, 1, 2, 4); assert_eq!( r, ::mem::transmute(__msa_pckod_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_pckod_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(4, 3); #[rustfmt::skip] let r = i64x2::new(3, 2); assert_eq!( r, ::mem::transmute(__msa_pckod_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_pcnt_b() { #[rustfmt::skip] let a = i8x16::new( -128, -64, -32, -16, -8, -4, -2, -1, 1, 2, 4, 8, 16, 32, 64, 127 ); #[rustfmt::skip] let r = i8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 1, 1, 1, 1, 1, 1, 1, 7 ); assert_eq!(r, ::mem::transmute(__msa_pcnt_b(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_pcnt_h() { #[rustfmt::skip] let a = i16x8::new( -32768, -16384, -8192, -4096, 4096, 8192, 16384, 32767 ); #[rustfmt::skip] let r = i16x8::new(1, 2, 3, 4, 1, 1, 1, 15); assert_eq!(r, ::mem::transmute(__msa_pcnt_h(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_pcnt_w() { #[rustfmt::skip] let a = i32x4::new( i32::min_value(), -1073741824, 1073741824, i32::max_value() ); #[rustfmt::skip] let r = i32x4::new(1, 2, 1, 31); assert_eq!(r, ::mem::transmute(__msa_pcnt_w(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_pcnt_d() { #[rustfmt::skip] let a = i64x2::new(-2147483648, 2147483647); #[rustfmt::skip] let r = i64x2::new(33, 31); assert_eq!(r, ::mem::transmute(__msa_pcnt_d(::mem::transmute(a)))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sat_s_b() { #[rustfmt::skip] let a = i8x16::new( i8::max_value(), 105, 30, 1, i8::max_value(), 105, 30, 1, i8::max_value(), 105, 30, 1, i8::max_value(), 105, 30, 1 ); #[rustfmt::skip] let r = i8x16::new( 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 1, 3, 3, 3, 1 ); assert_eq!(r, ::mem::transmute(__msa_sat_s_b(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sat_s_h() { #[rustfmt::skip] let a = i16x8::new( i16::max_value(), 1155, 155, 1, i16::max_value(), 1155, 155, 1 ); #[rustfmt::skip] let r = i16x8::new(127, 127, 127, 1, 127, 127, 127, 1); assert_eq!(r, ::mem::transmute(__msa_sat_s_h(::mem::transmute(a), 7))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sat_s_w() { #[rustfmt::skip] let a = i32x4::new(i32::max_value(), 111111155, i32::max_value(), 1); #[rustfmt::skip] let r = i32x4::new(131071, 131071, 131071, 1); assert_eq!(r, ::mem::transmute(__msa_sat_s_w(::mem::transmute(a), 17))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sat_s_d() { #[rustfmt::skip] let a = i64x2::new(i64::max_value(), 1); #[rustfmt::skip] let r = i64x2::new(137438953471, 1); assert_eq!(r, ::mem::transmute(__msa_sat_s_d(::mem::transmute(a), 37))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sat_u_b() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 105, 30, 1, u8::max_value(), 105, 30, 1, u8::max_value(), 105, 30, 1, u8::max_value(), 105, 30, 1 ); #[rustfmt::skip] let r = u8x16::new( 7, 7, 7, 1, 7, 7, 7, 1, 7, 7, 7, 1, 7, 7, 7, 1 ); assert_eq!(r, ::mem::transmute(__msa_sat_u_b(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sat_u_h() { #[rustfmt::skip] let a = u16x8::new( u16::max_value(), 1155, 155, 1, u16::max_value(), 1155, 155, 1 ); #[rustfmt::skip] let r = u16x8::new(255, 255, 155, 1, 255, 255, 155, 1); assert_eq!(r, ::mem::transmute(__msa_sat_u_h(::mem::transmute(a), 7))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sat_u_w() { #[rustfmt::skip] let a = u32x4::new(u32::max_value(), 111111155, u32::max_value(), 1); #[rustfmt::skip] let r = u32x4::new(262143, 262143, 262143, 1); assert_eq!(r, ::mem::transmute(__msa_sat_u_w(::mem::transmute(a), 17))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sat_u_d() { #[rustfmt::skip] let a = u64x2::new(u64::max_value(), 1); #[rustfmt::skip] let r = u64x2::new(274877906943, 1); assert_eq!(r, ::mem::transmute(__msa_sat_u_d(::mem::transmute(a), 37))); } #[simd_test(enable = "msa")] unsafe fn test_msa_shf_b() { #[rustfmt::skip] let a = i8x16::new( 11, 12, 3, 4, 11, 12, 3, 4, 11, 12, 3, 4, 11, 12, 3, 4 ); #[rustfmt::skip] let r = i8x16::new( 11, 3, 4, 12, 11, 3, 4, 12, 11, 3, 4, 12, 11, 3, 4, 12 ); assert_eq!(r, ::mem::transmute(__msa_shf_b(::mem::transmute(a), 120))); } #[simd_test(enable = "msa")] unsafe fn test_msa_shf_h() { #[rustfmt::skip] let a = i16x8::new( 11, 12, 13, 14, 11, 12, 13, 14 ); #[rustfmt::skip] let r = i16x8::new(11, 14, 12, 13, 11, 14, 12, 13); assert_eq!(r, ::mem::transmute(__msa_shf_h(::mem::transmute(a), 156))); } #[simd_test(enable = "msa")] unsafe fn test_msa_shf_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let r = i32x4::new(1, 3, 2, 4); assert_eq!(r, ::mem::transmute(__msa_shf_w(::mem::transmute(a), 216))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sld_b() { #[rustfmt::skip] let a = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); #[rustfmt::skip] let b = i8x16::new( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ); #[rustfmt::skip] let r = i8x16::new( 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4 ); assert_eq!( r, ::mem::transmute(__msa_sld_b(::mem::transmute(a), ::mem::transmute(b), 5)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sld_h() { #[rustfmt::skip] let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); #[rustfmt::skip] let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); // let c = 5 as i32; let r = i16x8::new(9, 10, 11, 0, 13, 14, 15, 4); assert_eq!( r, ::mem::transmute(__msa_sld_h(::mem::transmute(a), ::mem::transmute(b), 2)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sld_w() { #[rustfmt::skip] let a = i32x4::new(0, 1, 2, 3); #[rustfmt::skip] let b = i32x4::new(4, 5, 6, 7); #[rustfmt::skip] let r = i32x4::new(4, 5, 6, 7); assert_eq!( r, ::mem::transmute(__msa_sld_w(::mem::transmute(a), ::mem::transmute(b), 4)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sld_d() { #[rustfmt::skip] let a = i64x2::new(0, 1); #[rustfmt::skip] let b = i64x2::new(2, 3); #[rustfmt::skip] let r = i64x2::new(2, 3); assert_eq!( r, ::mem::transmute(__msa_sld_d(::mem::transmute(a), ::mem::transmute(b), 2)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sldi_b() { #[rustfmt::skip] let a = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); #[rustfmt::skip] let b = i8x16::new( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ); #[rustfmt::skip] let r = i8x16::new( 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4 ); assert_eq!( r, ::mem::transmute(__msa_sldi_b(::mem::transmute(a), ::mem::transmute(b), 5)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sldi_h() { #[rustfmt::skip] let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); #[rustfmt::skip] let b = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15); // let c = 5 as i32; let r = i16x8::new(9, 10, 11, 0, 13, 14, 15, 4); assert_eq!( r, ::mem::transmute(__msa_sldi_h(::mem::transmute(a), ::mem::transmute(b), 2)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sldi_w() { #[rustfmt::skip] let a = i32x4::new(0, 1, 2, 3); #[rustfmt::skip] let b = i32x4::new(4, 5, 6, 7); #[rustfmt::skip] let r = i32x4::new(4, 5, 6, 7); assert_eq!( r, ::mem::transmute(__msa_sldi_w(::mem::transmute(a), ::mem::transmute(b), 4)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sldi_d() { #[rustfmt::skip] let a = i64x2::new(0, 1); #[rustfmt::skip] let b = i64x2::new(2, 3); #[rustfmt::skip] let r = i64x2::new(2, 3); assert_eq!( r, ::mem::transmute(__msa_sldi_d(::mem::transmute(a), ::mem::transmute(b), 2)) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sll_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( 16, 16, 12, 8, 16, 16, 12, 8, 16, 16, 12, 8, 16, 16, 12, 8 ); assert_eq!( r, ::mem::transmute(__msa_sll_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sll_h() { #[rustfmt::skip] let a = i16x8::new(1, 2, 3, 4, 1, 2, 3, 4); #[rustfmt::skip] let b = i16x8::new(4, 3, 2, 1, 4, 3, 2, 1); #[rustfmt::skip] let r = i16x8::new(16, 16, 12, 8, 16, 16, 12, 8); assert_eq!( r, ::mem::transmute(__msa_sll_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sll_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i32x4::new(16, 16, 12, 8); assert_eq!( r, ::mem::transmute(__msa_sll_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sll_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(4, 3); #[rustfmt::skip] let r = i64x2::new(16, 16); assert_eq!( r, ::mem::transmute(__msa_sll_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_slli_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let r = i8x16::new( 4, 8, 12, 16, 4, 8, 12, 16, 4, 8, 12, 16, 4, 8, 12, 16 ); assert_eq!(r, ::mem::transmute(__msa_slli_b(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_slli_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let r = i16x8::new(4, 8, 12, 16, 4, 8, 12, 16); assert_eq!(r, ::mem::transmute(__msa_slli_h(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_slli_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let r = i32x4::new(4, 8, 12, 16); assert_eq!(r, ::mem::transmute(__msa_slli_w(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_slli_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let r = i64x2::new(2, 4); assert_eq!(r, ::mem::transmute(__msa_slli_d(::mem::transmute(a), 1))); } #[simd_test(enable = "msa")] unsafe fn test_msa_splat_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let r = i8x16::new( 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 ); assert_eq!(r, ::mem::transmute(__msa_splat_b(::mem::transmute(a), 3))); } #[simd_test(enable = "msa")] unsafe fn test_msa_splat_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 1, 2, 3, 4, ); #[rustfmt::skip] let r = i16x8::new(4, 4, 4, 4, 4, 4, 4, 4); assert_eq!(r, ::mem::transmute(__msa_splat_h(::mem::transmute(a), 3))); } #[simd_test(enable = "msa")] unsafe fn test_msa_splat_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let r = i32x4::new(4, 4, 4, 4); assert_eq!(r, ::mem::transmute(__msa_splat_w(::mem::transmute(a), 3))); } #[simd_test(enable = "msa")] unsafe fn test_msa_splat_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let r = i64x2::new(2, 2); assert_eq!(r, ::mem::transmute(__msa_splat_d(::mem::transmute(a), 3))); } #[simd_test(enable = "msa")] unsafe fn test_msa_splati_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let r = i8x16::new( 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 ); assert_eq!(r, ::mem::transmute(__msa_splati_b(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_splati_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 1, 2, 3, 4, ); #[rustfmt::skip] let r = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3); assert_eq!(r, ::mem::transmute(__msa_splati_h(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_splati_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let r = i32x4::new(3, 3, 3, 3); assert_eq!(r, ::mem::transmute(__msa_splati_w(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_splati_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let r = i64x2::new(2, 2); assert_eq!(r, ::mem::transmute(__msa_splati_d(::mem::transmute(a), 1))); } #[simd_test(enable = "msa")] unsafe fn test_msa_sra_b() { #[rustfmt::skip] let a = i8x16::new( -128, -64, -32, -16, -8, -4, -2, -1, 1, 2, 4, 8, 16, 32, 64, 127 ); #[rustfmt::skip] let b = i8x16::new( 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( -128, -1, -1, -1, -1, -1, -1, -1, 1, 0, 0, 0, 1, 4, 16, 63 ); assert_eq!( r, ::mem::transmute(__msa_sra_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sra_h() { #[rustfmt::skip] let a = i16x8::new( -32768, -16384, -8192, -4096, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new( 15, 14, 13, 12, 12, 13, 14, 15 ); #[rustfmt::skip] let r = i16x8::new( -1, -1, -1, -1, 0, 0, 0, 0 ); assert_eq!( r, ::mem::transmute(__msa_sra_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sra_w() { #[rustfmt::skip] let a = i32x4::new(i32::min_value(), -1073741824, 1, 2); #[rustfmt::skip] let b = i32x4::new(16, 15, 16, 15); #[rustfmt::skip] let r = i32x4::new(-32768, -32768, 0, 0); assert_eq!( r, ::mem::transmute(__msa_sra_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_sra_d() { #[rustfmt::skip] let a = i64x2::new(i64::min_value(), i64::max_value()); #[rustfmt::skip] let b = i64x2::new(32, 31); #[rustfmt::skip] let r = i64x2::new(-2147483648, 4294967295); assert_eq!( r, ::mem::transmute(__msa_sra_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srai_b() { #[rustfmt::skip] let a = i8x16::new( i8::max_value(), 125, 55, 1, i8::max_value(), 125, 55, 1, i8::max_value(), 125, 55, 1, i8::max_value(), 125, 55, 1 ); #[rustfmt::skip] let r = i8x16::new( 31, 31, 13, 0, 31, 31, 13, 0, 31, 31, 13, 0, 31, 31, 13, 0 ); assert_eq!(r, ::mem::transmute(__msa_srai_b(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srai_h() { #[rustfmt::skip] let a = i16x8::new( i16::max_value(), 125, 55, 1, i16::max_value(), 125, 55, 1 ); #[rustfmt::skip] let r = i16x8::new(8191, 31, 13, 0, 8191, 31, 13, 0); assert_eq!(r, ::mem::transmute(__msa_srai_h(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srai_w() { #[rustfmt::skip] let a = i32x4::new(i32::max_value(), 125, 55, 1); let r = i32x4::new(536870911, 31, 13, 0); assert_eq!(r, ::mem::transmute(__msa_srai_w(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srai_d() { #[rustfmt::skip] let a = i64x2::new(i64::max_value(), 55); #[rustfmt::skip] let r = i64x2::new(2305843009213693951, 13); assert_eq!(r, ::mem::transmute(__msa_srai_d(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srar_b() { #[rustfmt::skip] let a = i8x16::new( -128, -64, -32, -16, -8, -4, -2, -1, 1, 2, 4, 8, 16, 32, 64, 127 ); #[rustfmt::skip] let b = i8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( -8, -8, -8, -8, 0, 0, 0, 0, 1, 0, 0, 0, 1, 4, 16, 64 ); assert_eq!( r, ::mem::transmute(__msa_srar_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srar_h() { #[rustfmt::skip] let a = i16x8::new( i16::min_value(), -16384, -8192, -4096, 150, 50, 25, 15 ); #[rustfmt::skip] let b = i16x8::new( 4, 3, 2, 1, 1, 2, 3, 4 ); #[rustfmt::skip] let r = i16x8::new( -2048, -2048, -2048, -2048, 75, 13, 3, 1 ); assert_eq!( r, ::mem::transmute(__msa_srar_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srar_w() { #[rustfmt::skip] let a = i32x4::new(i32::min_value(), -1073741824, 100, 50); #[rustfmt::skip] let b = i32x4::new(16, 15, 1, 2); #[rustfmt::skip] let r = i32x4::new(-32768, -32768, 50, 13); assert_eq!( r, ::mem::transmute(__msa_srar_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srar_d() { #[rustfmt::skip] let a = i64x2::new(i64::min_value(), i64::max_value()); #[rustfmt::skip] let b = i64x2::new(32, 31); #[rustfmt::skip] let r = i64x2::new(-2147483648, 4294967296); assert_eq!( r, ::mem::transmute(__msa_srar_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srari_b() { #[rustfmt::skip] let a = i8x16::new( 125, i8::max_value(), 55, 1, 125, i8::max_value(), 55, 1, 125, i8::max_value(), 55, 1, 125, i8::max_value(), 55, 1 ); #[rustfmt::skip] let r = i8x16::new( 31, 32, 14, 0, 31, 32, 14, 0, 31, 32, 14, 0, 31, 32, 14, 0 ); assert_eq!(r, ::mem::transmute(__msa_srari_b(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srari_h() { #[rustfmt::skip] let a = i16x8::new(2155, 1155, 155, 1, 2155, 1155, 155, 1); #[rustfmt::skip] let r = i16x8::new(539, 289, 39, 0, 539, 289, 39, 0); assert_eq!(r, ::mem::transmute(__msa_srari_h(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srari_w() { #[rustfmt::skip] let a = i32x4::new(211111155, 111111155, 11111155, 1); #[rustfmt::skip] let r = i32x4::new(52777789, 27777789, 2777789, 0); assert_eq!(r, ::mem::transmute(__msa_srari_w(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srari_d() { #[rustfmt::skip] let a = i64x2::new(211111111155, 111111111155); #[rustfmt::skip] let r = i64x2::new(52777777789, 27777777789); assert_eq!(r, ::mem::transmute(__msa_srari_d(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srl_b() { #[rustfmt::skip] let a = i8x16::new( -128, -64, -32, -16, -8, -4, -2, -1, 1, 2, 4, 8, 16, 32, 64, 127 ); #[rustfmt::skip] let b = i8x16::new( 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( -128, 1, 3, 7, 15, 31, 63, 127, 1, 0, 0, 0, 1, 4, 16, 63 ); assert_eq!( r, ::mem::transmute(__msa_srl_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srl_h() { #[rustfmt::skip] let a = i16x8::new( -32768, -16384, -8192, -4096, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new( 15, 14, 13, 12, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i16x8::new(1, 3, 7, 15, 0, 0, 0, 2); assert_eq!( r, ::mem::transmute(__msa_srl_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srl_w() { #[rustfmt::skip] let a = i32x4::new(i32::min_value(), -1073741824, 1, 2); #[rustfmt::skip] let b = i32x4::new(16, 15, 16, 15); #[rustfmt::skip] let r = i32x4::new(32768, 98304, 0, 0); assert_eq!( r, ::mem::transmute(__msa_srl_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srl_d() { #[rustfmt::skip] let a = i64x2::new(i64::min_value(), i64::max_value()); #[rustfmt::skip] let b = i64x2::new(32, 31); #[rustfmt::skip] let r = i64x2::new(2147483648, 4294967295); assert_eq!( r, ::mem::transmute(__msa_srl_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srli_b() { #[rustfmt::skip] let a = i8x16::new( 25, 50, 100, 127, 25, 50, 100, 127, 25, 50, 100, 127, 25, 50, 100, 127 ); #[rustfmt::skip] let r = i8x16::new( 6, 12, 25, 31, 6, 12, 25, 31, 6, 12, 25, 31, 6, 12, 25, 31 ); assert_eq!(r, ::mem::transmute(__msa_srli_b(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srli_h() { #[rustfmt::skip] let a = i16x8::new( i16::max_value(), 3276, 100, 127, i16::max_value(), 3276, 100, 127 ); #[rustfmt::skip] let r = i16x8::new( 8191, 819, 25, 31, 8191, 819, 25, 31 ); assert_eq!(r, ::mem::transmute(__msa_srli_h(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srli_w() { #[rustfmt::skip] let a = i32x4::new(100, i32::max_value(), 100, i32::max_value()); #[rustfmt::skip] let r = i32x4::new(25, 536870911, 25, 536870911); assert_eq!(r, ::mem::transmute(__msa_srli_w(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srli_d() { #[rustfmt::skip] let a = i64x2::new(100, i64::max_value()); #[rustfmt::skip] let r = i64x2::new(50, 4611686018427387903); assert_eq!(r, ::mem::transmute(__msa_srli_d(::mem::transmute(a), 1))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srlr_b() { #[rustfmt::skip] let a = i8x16::new( -128, -64, -32, -16, -8, -4, -2, -1, 1, 2, 4, 8, 16, 32, 64, 127 ); #[rustfmt::skip] let b = i8x16::new( 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( -128, 2, 4, 8, 16, 32, 64, -128, 1, 0, 0, 0, 1, 4, 16, 64 ); assert_eq!( r, ::mem::transmute(__msa_srlr_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srlr_h() { #[rustfmt::skip] let a = i16x8::new( -32768, -16384, -8192, -4096, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new( 15, 14, 13, 12, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i16x8::new(1, 3, 7, 15, 0, 0, 1, 2); assert_eq!( r, ::mem::transmute(__msa_srlr_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srlr_w() { #[rustfmt::skip] let a = i32x4::new(i32::min_value(), -1073741824, 1, 2); #[rustfmt::skip] let b = i32x4::new(16, 15, 16, 15); let r = i32x4::new(32768, 98304, 0, 0); assert_eq!( r, ::mem::transmute(__msa_srlr_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srlr_d() { #[rustfmt::skip] let a = i64x2::new(i64::min_value(), i64::max_value()); #[rustfmt::skip] let b = i64x2::new(32, 31); #[rustfmt::skip] let r = i64x2::new(2147483648, 4294967296); assert_eq!( r, ::mem::transmute(__msa_srlr_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_srlri_b() { #[rustfmt::skip] let a = i8x16::new( 25, 50, 100, i8::max_value(), 25, 50, 100, i8::max_value(), 25, 50, 100, i8::max_value(), 25, 50, 100, i8::max_value() ); #[rustfmt::skip] let r = i8x16::new( 6, 13, 25, 32, 6, 13, 25, 32, 6, 13, 25, 32, 6, 13, 25, 32 ); assert_eq!(r, ::mem::transmute(__msa_srlri_b(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srlri_h() { #[rustfmt::skip] let a = i16x8::new( i16::max_value(), 3276, 100, 127, i16::max_value(), 3276, 100, 127 ); let r = i16x8::new(8192, 819, 25, 32, 8192, 819, 25, 32); assert_eq!(r, ::mem::transmute(__msa_srlri_h(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srlri_w() { #[rustfmt::skip] let a = i32x4::new(100, 150, 200, i32::max_value()); #[rustfmt::skip] let r = i32x4::new(25, 38, 50, 536870912); assert_eq!(r, ::mem::transmute(__msa_srlri_w(::mem::transmute(a), 2))); } #[simd_test(enable = "msa")] unsafe fn test_msa_srlri_d() { #[rustfmt::skip] let a = i64x2::new(100, i64::max_value()); #[rustfmt::skip] let r = i64x2::new(50, 4611686018427387904); assert_eq!(r, ::mem::transmute(__msa_srlri_d(::mem::transmute(a), 1))); } #[simd_test(enable = "msa")] unsafe fn test_msa_st_b() { #[rustfmt::skip] let a = i8x16::new( 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 ); #[rustfmt::skip] let mut arr : [i8; 16] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]; #[rustfmt::skip] let r : [i8; 16] = [ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 ]; __msa_st_b(::mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0); assert_eq!(arr, r); } #[simd_test(enable = "msa")] unsafe fn test_msa_st_h() { #[rustfmt::skip] let a = i16x8::new(13, 14, 15, 16, 17, 18, 19, 20); let mut arr: [i16; 8] = [0, 0, 0, 0, 0, 0, 0, 0]; #[rustfmt::skip] let r : [i16; 8] = [13, 14, 15, 16, 17, 18, 19, 20]; __msa_st_h(::mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0); assert_eq!(arr, r); } #[simd_test(enable = "msa")] unsafe fn test_msa_st_w() { #[rustfmt::skip] let a = i32x4::new(13, 14, 15, 16); let mut arr: [i32; 4] = [0, 0, 0, 0]; #[rustfmt::skip] let r : [i32; 4] = [13, 14, 15, 16]; __msa_st_w(::mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0); assert_eq!(arr, r); } #[simd_test(enable = "msa")] unsafe fn test_msa_st_d() { #[rustfmt::skip] let a = i64x2::new(13, 14); let mut arr: [i64; 2] = [0, 0]; #[rustfmt::skip] let r : [i64; 2] = [13, 14]; __msa_st_d(::mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0); assert_eq!(arr, r); } #[simd_test(enable = "msa")] unsafe fn test_msa_subs_s_b() { #[rustfmt::skip] let a = i8x16::new( i8::min_value(), -2, -3, -4, i8::min_value(), -2, -3, -4, i8::min_value(), -2, -3, -4, i8::min_value(), -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( 6, -7, 8, -9, 6, -7, 8, -9, 6, -7, 8, -9, 6, -7, 8, -9 ); #[rustfmt::skip] let r = i8x16::new( i8::min_value(), 5, -11, 5, i8::min_value(), 5, -11, 5, i8::min_value(), 5, -11, 5, i8::min_value(), 5, -11, 5 ); assert_eq!( r, ::mem::transmute(__msa_subs_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subs_s_h() { #[rustfmt::skip] let a = i16x8::new( i16::min_value(), -2, -3, -4, i16::min_value(), -2, -3, -4 ); #[rustfmt::skip] let b = i16x8::new(6, -7, 8, -9, 6, -7, 8, -9); #[rustfmt::skip] let r = i16x8::new( i16::min_value(), 5, -11, 5, i16::min_value(), 5, -11, 5 ); assert_eq!( r, ::mem::transmute(__msa_subs_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subs_s_w() { #[rustfmt::skip] let a = i32x4::new(i32::min_value(), -2, -3, -4); #[rustfmt::skip] let b = i32x4::new(6, -7, 8, -9); #[rustfmt::skip] let r = i32x4::new(i32::min_value(), 5, -11, 5); assert_eq!( r, ::mem::transmute(__msa_subs_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subs_s_d() { #[rustfmt::skip] let a = i64x2::new(i64::min_value(), -2); #[rustfmt::skip] let b = i64x2::new(6, -7); #[rustfmt::skip] let r = i64x2::new(i64::min_value(), 5); assert_eq!( r, ::mem::transmute(__msa_subs_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subs_u_b() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 2, 3, 4, u8::max_value(), 2, 3, 4, u8::max_value(), 2, 3, 4, u8::max_value(), 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9, 6, 7, 8, 9 ); #[rustfmt::skip] let r = u8x16::new( 249, 0, 0, 0, 249, 0, 0, 0, 249, 0, 0, 0, 249, 0, 0, 0 ); assert_eq!( r, ::mem::transmute(__msa_subs_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subs_u_h() { #[rustfmt::skip] let a = u16x8::new( u16::max_value(), 2, 3, 4, u16::max_value(), 2, 3, 4 ); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 9, 6, 7, 8, 9); #[rustfmt::skip] let r = u16x8::new(65529, 0, 0, 0, 65529, 0, 0, 0); assert_eq!( r, ::mem::transmute(__msa_subs_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subs_u_w() { #[rustfmt::skip] let a = u32x4::new(u32::max_value(), 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 9); #[rustfmt::skip] let r = u32x4::new(4294967289, 0, 0, 0); assert_eq!( r, ::mem::transmute(__msa_subs_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subs_u_d() { #[rustfmt::skip] let a = u64x2::new(u64::max_value(), 2); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = u64x2::new(18446744073709551609, 0); assert_eq!( r, ::mem::transmute(__msa_subs_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subsus_u_b() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 2, 3, 4, u8::max_value(), 2, 3, 4, u8::max_value(), 2, 3, 4, u8::max_value(), 2, 3, 4 ); #[rustfmt::skip] let b = i8x16::new( -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9, -6, -7, -8, -9 ); #[rustfmt::skip] let r = u8x16::new( 255, 9, 11, 13, 255, 9, 11, 13, 255, 9, 11, 13, 255, 9, 11, 13 ); assert_eq!( r, ::mem::transmute(__msa_subsus_u_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subsus_u_h() { #[rustfmt::skip] let a = u16x8::new( u16::max_value(), 2, 3, 4, u16::max_value(), 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new(-6, -7, -8, -9, -6, -7, -8, -9); #[rustfmt::skip] let r = u16x8::new(65535, 9, 11, 13, 65535, 9, 11, 13); assert_eq!( r, ::mem::transmute(__msa_subsus_u_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subsus_u_w() { #[rustfmt::skip] let a = u32x4::new(u32::max_value(), 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(-6, -7, -8, -9); #[rustfmt::skip] let r = u32x4::new(4294967295, 9, 11, 13); assert_eq!( r, ::mem::transmute(__msa_subsus_u_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subsus_u_d() { #[rustfmt::skip] let a = u64x2::new(u64::max_value(), 2); #[rustfmt::skip] let b = i64x2::new(-6, -7); #[rustfmt::skip] let r = u64x2::new(18446744073709551615, 9); assert_eq!( r, ::mem::transmute(__msa_subsus_u_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subsuu_s_b() { #[rustfmt::skip] let a = u8x16::new( u8::max_value(), 2, 3, 4, u8::max_value(), 2, 3, 4, u8::max_value(), 2, 3, 4, u8::max_value(), 2, 3, 4 ); #[rustfmt::skip] let b = u8x16::new( 6, 7, 8, u8::max_value(), 6, 7, 8, u8::max_value(), 6, 7, 8, u8::max_value(), 6, 7, 8, u8::max_value() ); #[rustfmt::skip] let r = i8x16::new( 127, -5, -5, -128, 127, -5, -5, -128, 127, -5, -5, -128, 127, -5, -5, -128 ); assert_eq!( r, ::mem::transmute(__msa_subsuu_s_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subsuu_s_h() { #[rustfmt::skip] let a = u16x8::new( u16::max_value(), 2, 3, 4, u16::max_value(), 2, 3, 4 ); #[rustfmt::skip] let b = u16x8::new(6, 7, 8, 65535, 6, 7, 8, 65535); #[rustfmt::skip] let r = i16x8::new(32767, -5, -5, -32768, 32767, -5, -5, -32768); assert_eq!( r, ::mem::transmute(__msa_subsuu_s_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subsuu_s_w() { #[rustfmt::skip] let a = u32x4::new(u32::max_value(), 2, 3, 4); #[rustfmt::skip] let b = u32x4::new(6, 7, 8, 4294967295); #[rustfmt::skip] let r = i32x4::new(2147483647, -5, -5, -2147483648); assert_eq!( r, ::mem::transmute(__msa_subsuu_s_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subsuu_s_d() { #[rustfmt::skip] let a = u64x2::new(u64::max_value(), 2); #[rustfmt::skip] let b = u64x2::new(6, 7); #[rustfmt::skip] let r = i64x2::new(i64::max_value(), -5); assert_eq!( r, ::mem::transmute(__msa_subsuu_s_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subv_b() { #[rustfmt::skip] let a = i8x16::new( i8::min_value(), -2, -3, -4, i8::min_value(), -2, -3, -4, i8::min_value(), -2, -3, -4, i8::min_value(), -2, -3, -4 ); #[rustfmt::skip] let b = i8x16::new( 6, -7, 8, -9, 6, -7, 8, -9, 6, -7, 8, -9, 6, -7, 8, -9 ); #[rustfmt::skip] let r = i8x16::new( 122, 5, -11, 5, 122, 5, -11, 5, 122, 5, -11, 5, 122, 5, -11, 5 ); assert_eq!( r, ::mem::transmute(__msa_subv_b(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subv_h() { #[rustfmt::skip] let a = i16x8::new( i16::min_value(), -2, -3, -4, i16::min_value(), -2, -3, -4 ); #[rustfmt::skip] let b = i16x8::new(6, -7, 8, -9, 6, -7, 8, -9); #[rustfmt::skip] let r = i16x8::new(32762, 5, -11, 5, 32762, 5, -11, 5); assert_eq!( r, ::mem::transmute(__msa_subv_h(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subv_w() { #[rustfmt::skip] let a = i32x4::new(i32::min_value(), -2, -3, -4); #[rustfmt::skip] let b = i32x4::new(6, -7, 8, -9); #[rustfmt::skip] let r = i32x4::new(2147483642, 5, -11, 5); assert_eq!( r, ::mem::transmute(__msa_subv_w(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subv_d() { #[rustfmt::skip] let a = i64x2::new(i64::max_value(), -2); #[rustfmt::skip] let b = i64x2::new(6, -7); #[rustfmt::skip] let r = i64x2::new(9223372036854775801, 5); assert_eq!( r, ::mem::transmute(__msa_subv_d(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_subvi_b() { #[rustfmt::skip] let a = i8x16::new( 100, i8::max_value(), 50, i8::min_value(), 100, i8::max_value(), 50, i8::min_value(), 100, i8::max_value(), 50, i8::min_value(), 100, i8::max_value(), 50, i8::min_value() ); #[rustfmt::skip] let r = i8x16::new( 95, 122, 45, 123, 95, 122, 45, 123, 95, 122, 45, 123, 95, 122, 45, 123 ); assert_eq!(r, ::mem::transmute(__msa_subvi_b(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_subvi_h() { #[rustfmt::skip] let a = i16x8::new( i16::max_value(), 3276, -100, i16::min_value(), i16::max_value(), 3276, -100, i16::min_value() ); #[rustfmt::skip] let r = i16x8::new( 32762, 3271, -105, 32763, 32762, 3271, -105, 32763 ); assert_eq!(r, ::mem::transmute(__msa_subvi_h(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_subvi_w() { #[rustfmt::skip] let a = i32x4::new(100, 150, 200, i32::max_value()); #[rustfmt::skip] let r = i32x4::new(95, 145, 195, 2147483642); assert_eq!(r, ::mem::transmute(__msa_subvi_w(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_subvi_d() { #[rustfmt::skip] let a = i64x2::new(100, i64::max_value()); #[rustfmt::skip] let r = i64x2::new(95, 9223372036854775802); assert_eq!(r, ::mem::transmute(__msa_subvi_d(::mem::transmute(a), 5))); } #[simd_test(enable = "msa")] unsafe fn test_msa_vshf_b() { #[rustfmt::skip] let a = i8x16::new( 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let c = i8x16::new( 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let r = i8x16::new( 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4 ); assert_eq!( r, ::mem::transmute(__msa_vshf_b( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_vshf_h() { #[rustfmt::skip] let a = i16x8::new( 1, 2, 3, 4, 1, 2, 3, 4 ); #[rustfmt::skip] let b = i16x8::new( 4, 3, 2, 1, 4, 3, 2, 1 ); #[rustfmt::skip] let c = i16x8::new( 4, 3, 2, 1, 4, 3, 2, 1 ); let r = i16x8::new(3, 2, 1, 4, 3, 2, 1, 4); assert_eq!( r, ::mem::transmute(__msa_vshf_h( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_vshf_w() { #[rustfmt::skip] let a = i32x4::new(1, 2, 3, 4); #[rustfmt::skip] let b = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let c = i32x4::new(4, 3, 2, 1); #[rustfmt::skip] let r = i32x4::new(3, 2, 1, 4); assert_eq!( r, ::mem::transmute(__msa_vshf_w( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_vshf_d() { #[rustfmt::skip] let a = i64x2::new(1, 2); #[rustfmt::skip] let b = i64x2::new(4, 3); #[rustfmt::skip] let c = i64x2::new(4, 3); #[rustfmt::skip] let r = i64x2::new(3, 4); assert_eq!( r, ::mem::transmute(__msa_vshf_d( ::mem::transmute(a), ::mem::transmute(b), ::mem::transmute(c) )) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_xor_v() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let b = u8x16::new( 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ); #[rustfmt::skip] let r = u8x16::new( 17, 13, 13, 9, 9, 13, 13, 1, 1, 13, 13, 9, 9, 13, 13, 17 ); assert_eq!( r, ::mem::transmute(__msa_xor_v(::mem::transmute(a), ::mem::transmute(b))) ); } #[simd_test(enable = "msa")] unsafe fn test_msa_xori_b() { #[rustfmt::skip] let a = u8x16::new( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); #[rustfmt::skip] let r = u8x16::new( 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 20 ); assert_eq!(r, ::mem::transmute(__msa_xori_b(::mem::transmute(a), 4))); } } core_arch-0.1.5/src/mips/msa/macros.rs010064400007650000024000004473451345305473200160610ustar0000000000000000//! Utility macros. //immediate value: -4096:4088 macro_rules! constify_imm_s13 { ($imm_s13:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm_s13) & 0b1_1111_1111_1111 { 0 => $expand!(0), 8 => $expand!(8), 16 => $expand!(16), 24 => $expand!(24), 32 => $expand!(32), 40 => $expand!(40), 48 => $expand!(48), 56 => $expand!(56), 64 => $expand!(64), 72 => $expand!(72), 80 => $expand!(80), 88 => $expand!(88), 96 => $expand!(96), 104 => $expand!(104), 112 => $expand!(112), 120 => $expand!(120), 128 => $expand!(128), 136 => $expand!(136), 144 => $expand!(144), 152 => $expand!(152), 160 => $expand!(160), 168 => $expand!(168), 176 => $expand!(176), 184 => $expand!(184), 192 => $expand!(192), 200 => $expand!(200), 208 => $expand!(208), 216 => $expand!(216), 224 => $expand!(224), 232 => $expand!(232), 240 => $expand!(240), 248 => $expand!(248), 256 => $expand!(256), 264 => $expand!(264), 272 => $expand!(272), 280 => $expand!(280), 288 => $expand!(288), 296 => $expand!(296), 304 => $expand!(304), 312 => $expand!(312), 320 => $expand!(320), 328 => $expand!(328), 336 => $expand!(336), 344 => $expand!(344), 352 => $expand!(352), 360 => $expand!(360), 368 => $expand!(368), 376 => $expand!(376), 384 => $expand!(384), 392 => $expand!(392), 400 => $expand!(400), 408 => $expand!(408), 416 => $expand!(416), 424 => $expand!(424), 432 => $expand!(432), 440 => $expand!(440), 448 => $expand!(448), 456 => $expand!(456), 464 => $expand!(464), 472 => $expand!(472), 480 => $expand!(480), 488 => $expand!(488), 496 => $expand!(496), 504 => $expand!(504), 512 => $expand!(512), 520 => $expand!(520), 528 => $expand!(528), 536 => $expand!(536), 544 => $expand!(544), 552 => $expand!(552), 560 => $expand!(560), 568 => $expand!(568), 576 => $expand!(576), 584 => $expand!(584), 592 => $expand!(592), 600 => $expand!(600), 608 => $expand!(608), 616 => $expand!(616), 624 => $expand!(624), 632 => $expand!(632), 640 => $expand!(640), 648 => $expand!(648), 656 => $expand!(656), 664 => $expand!(664), 672 => $expand!(672), 680 => $expand!(680), 688 => $expand!(688), 696 => $expand!(696), 704 => $expand!(704), 712 => $expand!(712), 720 => $expand!(720), 728 => $expand!(728), 736 => $expand!(736), 744 => $expand!(744), 752 => $expand!(752), 760 => $expand!(760), 768 => $expand!(768), 776 => $expand!(776), 784 => $expand!(784), 792 => $expand!(792), 800 => $expand!(800), 808 => $expand!(808), 816 => $expand!(816), 824 => $expand!(824), 832 => $expand!(832), 840 => $expand!(840), 848 => $expand!(848), 856 => $expand!(856), 864 => $expand!(864), 872 => $expand!(872), 880 => $expand!(880), 888 => $expand!(888), 896 => $expand!(896), 904 => $expand!(904), 912 => $expand!(912), 920 => $expand!(920), 928 => $expand!(928), 936 => $expand!(936), 944 => $expand!(944), 952 => $expand!(952), 960 => $expand!(960), 968 => $expand!(968), 976 => $expand!(976), 984 => $expand!(984), 992 => $expand!(992), 1000 => $expand!(1000), 1008 => $expand!(1008), 1016 => $expand!(1016), 1024 => $expand!(1024), 1032 => $expand!(1032), 1040 => $expand!(1040), 1048 => $expand!(1048), 1056 => $expand!(1056), 1064 => $expand!(1064), 1072 => $expand!(1072), 1080 => $expand!(1080), 1088 => $expand!(1088), 1096 => $expand!(1096), 1104 => $expand!(1104), 1112 => $expand!(1112), 1120 => $expand!(1120), 1128 => $expand!(1128), 1136 => $expand!(1136), 1144 => $expand!(1144), 1152 => $expand!(1152), 1160 => $expand!(1160), 1168 => $expand!(1168), 1176 => $expand!(1176), 1184 => $expand!(1184), 1192 => $expand!(1192), 1200 => $expand!(1200), 1208 => $expand!(1208), 1216 => $expand!(1216), 1224 => $expand!(1224), 1232 => $expand!(1232), 1240 => $expand!(1240), 1248 => $expand!(1248), 1256 => $expand!(1256), 1264 => $expand!(1264), 1272 => $expand!(1272), 1280 => $expand!(1280), 1288 => $expand!(1288), 1296 => $expand!(1296), 1304 => $expand!(1304), 1312 => $expand!(1312), 1320 => $expand!(1320), 1328 => $expand!(1328), 1336 => $expand!(1336), 1344 => $expand!(1344), 1352 => $expand!(1352), 1360 => $expand!(1360), 1368 => $expand!(1368), 1376 => $expand!(1376), 1384 => $expand!(1384), 1392 => $expand!(1392), 1400 => $expand!(1400), 1408 => $expand!(1408), 1416 => $expand!(1416), 1424 => $expand!(1424), 1432 => $expand!(1432), 1440 => $expand!(1440), 1448 => $expand!(1448), 1456 => $expand!(1456), 1464 => $expand!(1464), 1472 => $expand!(1472), 1480 => $expand!(1480), 1488 => $expand!(1488), 1496 => $expand!(1496), 1504 => $expand!(1504), 1512 => $expand!(1512), 1520 => $expand!(1520), 1528 => $expand!(1528), 1536 => $expand!(1536), 1544 => $expand!(1544), 1552 => $expand!(1552), 1560 => $expand!(1560), 1568 => $expand!(1568), 1576 => $expand!(1576), 1584 => $expand!(1584), 1592 => $expand!(1592), 1600 => $expand!(1600), 1608 => $expand!(1608), 1616 => $expand!(1616), 1624 => $expand!(1624), 1632 => $expand!(1632), 1640 => $expand!(1640), 1648 => $expand!(1648), 1656 => $expand!(1656), 1664 => $expand!(1664), 1672 => $expand!(1672), 1680 => $expand!(1680), 1688 => $expand!(1688), 1696 => $expand!(1696), 1704 => $expand!(1704), 1712 => $expand!(1712), 1720 => $expand!(1720), 1728 => $expand!(1728), 1736 => $expand!(1736), 1744 => $expand!(1744), 1752 => $expand!(1752), 1760 => $expand!(1760), 1768 => $expand!(1768), 1776 => $expand!(1776), 1784 => $expand!(1784), 1792 => $expand!(1792), 1800 => $expand!(1800), 1808 => $expand!(1808), 1816 => $expand!(1816), 1824 => $expand!(1824), 1832 => $expand!(1832), 1840 => $expand!(1840), 1848 => $expand!(1848), 1856 => $expand!(1856), 1864 => $expand!(1864), 1872 => $expand!(1872), 1880 => $expand!(1880), 1888 => $expand!(1888), 1896 => $expand!(1896), 1904 => $expand!(1904), 1912 => $expand!(1912), 1920 => $expand!(1920), 1928 => $expand!(1928), 1936 => $expand!(1936), 1944 => $expand!(1944), 1952 => $expand!(1952), 1960 => $expand!(1960), 1968 => $expand!(1968), 1976 => $expand!(1976), 1984 => $expand!(1984), 1992 => $expand!(1992), 2000 => $expand!(2000), 2008 => $expand!(2008), 2016 => $expand!(2016), 2024 => $expand!(2024), 2032 => $expand!(2032), 2040 => $expand!(2040), 2048 => $expand!(2048), 2056 => $expand!(2056), 2064 => $expand!(2064), 2072 => $expand!(2072), 2080 => $expand!(2080), 2088 => $expand!(2088), 2096 => $expand!(2096), 2104 => $expand!(2104), 2112 => $expand!(2112), 2120 => $expand!(2120), 2128 => $expand!(2128), 2136 => $expand!(2136), 2144 => $expand!(2144), 2152 => $expand!(2152), 2160 => $expand!(2160), 2168 => $expand!(2168), 2176 => $expand!(2176), 2184 => $expand!(2184), 2192 => $expand!(2192), 2200 => $expand!(2200), 2208 => $expand!(2208), 2216 => $expand!(2216), 2224 => $expand!(2224), 2232 => $expand!(2232), 2240 => $expand!(2240), 2248 => $expand!(2248), 2256 => $expand!(2256), 2264 => $expand!(2264), 2272 => $expand!(2272), 2280 => $expand!(2280), 2288 => $expand!(2288), 2296 => $expand!(2296), 2304 => $expand!(2304), 2312 => $expand!(2312), 2320 => $expand!(2320), 2328 => $expand!(2328), 2336 => $expand!(2336), 2344 => $expand!(2344), 2352 => $expand!(2352), 2360 => $expand!(2360), 2368 => $expand!(2368), 2376 => $expand!(2376), 2384 => $expand!(2384), 2392 => $expand!(2392), 2400 => $expand!(2400), 2408 => $expand!(2408), 2416 => $expand!(2416), 2424 => $expand!(2424), 2432 => $expand!(2432), 2440 => $expand!(2440), 2448 => $expand!(2448), 2456 => $expand!(2456), 2464 => $expand!(2464), 2472 => $expand!(2472), 2480 => $expand!(2480), 2488 => $expand!(2488), 2496 => $expand!(2496), 2504 => $expand!(2504), 2512 => $expand!(2512), 2520 => $expand!(2520), 2528 => $expand!(2528), 2536 => $expand!(2536), 2544 => $expand!(2544), 2552 => $expand!(2552), 2560 => $expand!(2560), 2568 => $expand!(2568), 2576 => $expand!(2576), 2584 => $expand!(2584), 2592 => $expand!(2592), 2600 => $expand!(2600), 2608 => $expand!(2608), 2616 => $expand!(2616), 2624 => $expand!(2624), 2632 => $expand!(2632), 2640 => $expand!(2640), 2648 => $expand!(2648), 2656 => $expand!(2656), 2664 => $expand!(2664), 2672 => $expand!(2672), 2680 => $expand!(2680), 2688 => $expand!(2688), 2696 => $expand!(2696), 2704 => $expand!(2704), 2712 => $expand!(2712), 2720 => $expand!(2720), 2728 => $expand!(2728), 2736 => $expand!(2736), 2744 => $expand!(2744), 2752 => $expand!(2752), 2760 => $expand!(2760), 2768 => $expand!(2768), 2776 => $expand!(2776), 2784 => $expand!(2784), 2792 => $expand!(2792), 2800 => $expand!(2800), 2808 => $expand!(2808), 2816 => $expand!(2816), 2824 => $expand!(2824), 2832 => $expand!(2832), 2840 => $expand!(2840), 2848 => $expand!(2848), 2856 => $expand!(2856), 2864 => $expand!(2864), 2872 => $expand!(2872), 2880 => $expand!(2880), 2888 => $expand!(2888), 2896 => $expand!(2896), 2904 => $expand!(2904), 2912 => $expand!(2912), 2920 => $expand!(2920), 2928 => $expand!(2928), 2936 => $expand!(2936), 2944 => $expand!(2944), 2952 => $expand!(2952), 2960 => $expand!(2960), 2968 => $expand!(2968), 2976 => $expand!(2976), 2984 => $expand!(2984), 2992 => $expand!(2992), 3000 => $expand!(3000), 3008 => $expand!(3008), 3016 => $expand!(3016), 3024 => $expand!(3024), 3032 => $expand!(3032), 3040 => $expand!(3040), 3048 => $expand!(3048), 3056 => $expand!(3056), 3064 => $expand!(3064), 3072 => $expand!(3072), 3080 => $expand!(3080), 3088 => $expand!(3088), 3096 => $expand!(3096), 3104 => $expand!(3104), 3112 => $expand!(3112), 3120 => $expand!(3120), 3128 => $expand!(3128), 3136 => $expand!(3136), 3144 => $expand!(3144), 3152 => $expand!(3152), 3160 => $expand!(3160), 3168 => $expand!(3168), 3176 => $expand!(3176), 3184 => $expand!(3184), 3192 => $expand!(3192), 3200 => $expand!(3200), 3208 => $expand!(3208), 3216 => $expand!(3216), 3224 => $expand!(3224), 3232 => $expand!(3232), 3240 => $expand!(3240), 3248 => $expand!(3248), 3256 => $expand!(3256), 3264 => $expand!(3264), 3272 => $expand!(3272), 3280 => $expand!(3280), 3288 => $expand!(3288), 3296 => $expand!(3296), 3304 => $expand!(3304), 3312 => $expand!(3312), 3320 => $expand!(3320), 3328 => $expand!(3328), 3336 => $expand!(3336), 3344 => $expand!(3344), 3352 => $expand!(3352), 3360 => $expand!(3360), 3368 => $expand!(3368), 3376 => $expand!(3376), 3384 => $expand!(3384), 3392 => $expand!(3392), 3400 => $expand!(3400), 3408 => $expand!(3408), 3416 => $expand!(3416), 3424 => $expand!(3424), 3432 => $expand!(3432), 3440 => $expand!(3440), 3448 => $expand!(3448), 3456 => $expand!(3456), 3464 => $expand!(3464), 3472 => $expand!(3472), 3480 => $expand!(3480), 3488 => $expand!(3488), 3496 => $expand!(3496), 3504 => $expand!(3504), 3512 => $expand!(3512), 3520 => $expand!(3520), 3528 => $expand!(3528), 3536 => $expand!(3536), 3544 => $expand!(3544), 3552 => $expand!(3552), 3560 => $expand!(3560), 3568 => $expand!(3568), 3576 => $expand!(3576), 3584 => $expand!(3584), 3592 => $expand!(3592), 3600 => $expand!(3600), 3608 => $expand!(3608), 3616 => $expand!(3616), 3624 => $expand!(3624), 3632 => $expand!(3632), 3640 => $expand!(3640), 3648 => $expand!(3648), 3656 => $expand!(3656), 3664 => $expand!(3664), 3672 => $expand!(3672), 3680 => $expand!(3680), 3688 => $expand!(3688), 3696 => $expand!(3696), 3704 => $expand!(3704), 3712 => $expand!(3712), 3720 => $expand!(3720), 3728 => $expand!(3728), 3736 => $expand!(3736), 3744 => $expand!(3744), 3752 => $expand!(3752), 3760 => $expand!(3760), 3768 => $expand!(3768), 3776 => $expand!(3776), 3784 => $expand!(3784), 3792 => $expand!(3792), 3700 => $expand!(3700), 3808 => $expand!(3808), 3816 => $expand!(3816), 3824 => $expand!(3824), 3832 => $expand!(3832), 3840 => $expand!(3840), 3848 => $expand!(3848), 3856 => $expand!(3856), 3864 => $expand!(3864), 3872 => $expand!(3872), 3880 => $expand!(3880), 3888 => $expand!(3888), 3896 => $expand!(3896), 3904 => $expand!(3904), 3912 => $expand!(3912), 3920 => $expand!(3920), 3928 => $expand!(3928), 3936 => $expand!(3936), 3944 => $expand!(3944), 3952 => $expand!(3952), 3960 => $expand!(3960), 3968 => $expand!(3968), 3976 => $expand!(3976), 3984 => $expand!(3984), 3992 => $expand!(3992), 4000 => $expand!(4000), 4008 => $expand!(4008), 4016 => $expand!(4016), 4024 => $expand!(4024), 4032 => $expand!(4032), 4040 => $expand!(4040), 4048 => $expand!(4048), 4056 => $expand!(4056), 4064 => $expand!(4064), 4072 => $expand!(4072), 4080 => $expand!(4080), 4096 => $expand!(-4096), 4104 => $expand!(-4088), 4112 => $expand!(-4080), 4120 => $expand!(-4072), 4128 => $expand!(-4064), 4136 => $expand!(-4056), 4144 => $expand!(-4048), 4152 => $expand!(-4040), 4160 => $expand!(-4032), 4168 => $expand!(-4024), 4176 => $expand!(-4016), 4184 => $expand!(-4008), 4192 => $expand!(-4000), 4200 => $expand!(-3992), 4208 => $expand!(-3984), 4216 => $expand!(-3976), 4224 => $expand!(-3968), 4232 => $expand!(-3960), 4240 => $expand!(-3952), 4248 => $expand!(-3944), 4256 => $expand!(-3936), 4264 => $expand!(-3928), 4272 => $expand!(-3920), 4280 => $expand!(-3912), 4288 => $expand!(-3904), 4296 => $expand!(-3896), 4304 => $expand!(-3888), 4312 => $expand!(-3880), 4320 => $expand!(-3872), 4328 => $expand!(-3864), 4336 => $expand!(-3856), 4344 => $expand!(-3848), 4352 => $expand!(-3840), 4360 => $expand!(-3832), 4368 => $expand!(-3824), 4376 => $expand!(-3816), 4384 => $expand!(-3808), 4392 => $expand!(-3800), 4400 => $expand!(-3792), 4408 => $expand!(-3784), 4416 => $expand!(-3776), 4424 => $expand!(-3768), 4432 => $expand!(-3760), 4440 => $expand!(-3752), 4448 => $expand!(-3744), 4456 => $expand!(-3736), 4464 => $expand!(-3728), 4472 => $expand!(-3720), 4480 => $expand!(-3712), 4488 => $expand!(-3704), 4496 => $expand!(-3696), 4504 => $expand!(-3688), 4512 => $expand!(-3680), 4520 => $expand!(-3672), 4528 => $expand!(-3664), 4536 => $expand!(-3656), 4544 => $expand!(-3648), 4552 => $expand!(-3640), 4560 => $expand!(-3632), 4568 => $expand!(-3624), 4576 => $expand!(-3616), 4584 => $expand!(-3608), 4592 => $expand!(-3600), 4600 => $expand!(-3592), 4608 => $expand!(-3584), 4616 => $expand!(-3576), 4624 => $expand!(-3568), 4632 => $expand!(-3560), 4640 => $expand!(-3552), 4648 => $expand!(-3544), 4656 => $expand!(-3536), 4664 => $expand!(-3528), 4672 => $expand!(-3520), 4680 => $expand!(-3512), 4688 => $expand!(-3504), 4696 => $expand!(-3496), 4704 => $expand!(-3488), 4712 => $expand!(-3480), 4720 => $expand!(-3472), 4728 => $expand!(-3464), 4736 => $expand!(-3456), 4744 => $expand!(-3448), 4752 => $expand!(-3440), 4760 => $expand!(-3432), 4768 => $expand!(-3424), 4776 => $expand!(-3416), 4784 => $expand!(-3408), 4792 => $expand!(-3400), 4800 => $expand!(-3392), 4808 => $expand!(-3384), 4816 => $expand!(-3376), 4824 => $expand!(-3368), 4832 => $expand!(-3360), 4840 => $expand!(-3352), 4848 => $expand!(-3344), 4856 => $expand!(-3336), 4864 => $expand!(-3328), 4872 => $expand!(-3320), 4880 => $expand!(-3312), 4888 => $expand!(-3304), 4896 => $expand!(-3296), 4904 => $expand!(-3288), 4912 => $expand!(-3280), 4920 => $expand!(-3272), 4928 => $expand!(-3264), 4936 => $expand!(-3256), 4944 => $expand!(-3248), 4952 => $expand!(-3240), 4960 => $expand!(-3232), 4968 => $expand!(-3224), 4976 => $expand!(-3216), 4984 => $expand!(-3208), 4992 => $expand!(-3200), 5000 => $expand!(-3192), 5008 => $expand!(-3184), 5016 => $expand!(-3176), 5024 => $expand!(-3168), 5032 => $expand!(-3160), 5040 => $expand!(-3152), 5048 => $expand!(-3144), 5056 => $expand!(-3136), 5064 => $expand!(-3128), 5072 => $expand!(-3120), 5080 => $expand!(-3112), 5088 => $expand!(-3104), 5096 => $expand!(-3096), 5104 => $expand!(-3088), 5112 => $expand!(-3080), 5120 => $expand!(-3072), 5128 => $expand!(-3064), 5136 => $expand!(-3056), 5144 => $expand!(-3048), 5152 => $expand!(-3040), 5160 => $expand!(-3032), 5168 => $expand!(-3024), 5176 => $expand!(-3016), 5184 => $expand!(-3008), 5192 => $expand!(-3000), 5200 => $expand!(-2992), 5208 => $expand!(-2984), 5216 => $expand!(-2976), 5224 => $expand!(-2968), 5232 => $expand!(-2960), 5240 => $expand!(-2952), 5248 => $expand!(-2944), 5256 => $expand!(-2936), 5264 => $expand!(-2928), 5272 => $expand!(-2920), 5280 => $expand!(-2912), 5288 => $expand!(-2904), 5296 => $expand!(-2896), 5304 => $expand!(-2888), 5312 => $expand!(-2880), 5320 => $expand!(-2872), 5328 => $expand!(-2864), 5336 => $expand!(-2856), 5344 => $expand!(-2848), 5352 => $expand!(-2840), 5360 => $expand!(-2832), 5368 => $expand!(-2824), 5376 => $expand!(-2816), 5384 => $expand!(-2808), 5392 => $expand!(-2800), 5400 => $expand!(-2792), 5408 => $expand!(-2784), 5416 => $expand!(-2776), 5424 => $expand!(-2768), 5432 => $expand!(-2760), 5440 => $expand!(-2752), 5448 => $expand!(-2744), 5456 => $expand!(-2736), 5464 => $expand!(-2728), 5472 => $expand!(-2720), 5480 => $expand!(-2712), 5488 => $expand!(-2704), 5496 => $expand!(-2696), 5504 => $expand!(-2688), 5512 => $expand!(-2680), 5520 => $expand!(-2672), 5528 => $expand!(-2664), 5536 => $expand!(-2656), 5544 => $expand!(-2648), 5552 => $expand!(-2640), 5560 => $expand!(-2632), 5568 => $expand!(-2624), 5576 => $expand!(-2616), 5584 => $expand!(-2608), 5592 => $expand!(-2600), 5600 => $expand!(-2592), 5608 => $expand!(-2584), 5616 => $expand!(-2576), 5624 => $expand!(-2568), 5632 => $expand!(-2560), 5640 => $expand!(-2552), 5648 => $expand!(-2544), 5656 => $expand!(-2536), 5664 => $expand!(-2528), 5672 => $expand!(-2520), 5680 => $expand!(-2512), 5688 => $expand!(-2504), 5696 => $expand!(-2496), 5704 => $expand!(-2488), 5712 => $expand!(-2480), 5720 => $expand!(-2472), 5728 => $expand!(-2464), 5736 => $expand!(-2456), 5744 => $expand!(-2448), 5752 => $expand!(-2440), 5760 => $expand!(-2432), 5768 => $expand!(-2424), 5776 => $expand!(-2416), 5784 => $expand!(-2408), 5792 => $expand!(-2400), 5800 => $expand!(-2392), 5808 => $expand!(-2384), 5816 => $expand!(-2376), 5824 => $expand!(-2368), 5832 => $expand!(-2360), 5840 => $expand!(-2352), 5848 => $expand!(-2344), 5856 => $expand!(-2336), 5864 => $expand!(-2328), 5872 => $expand!(-2320), 5880 => $expand!(-2312), 5888 => $expand!(-2304), 5896 => $expand!(-2296), 5904 => $expand!(-2288), 5912 => $expand!(-2280), 5920 => $expand!(-2272), 5928 => $expand!(-2264), 5936 => $expand!(-2256), 5944 => $expand!(-2248), 5952 => $expand!(-2240), 5960 => $expand!(-2232), 5968 => $expand!(-2224), 5976 => $expand!(-2216), 5984 => $expand!(-2208), 5992 => $expand!(-2200), 6000 => $expand!(-2192), 6008 => $expand!(-2184), 6016 => $expand!(-2176), 6024 => $expand!(-2168), 6032 => $expand!(-2160), 6040 => $expand!(-2152), 6048 => $expand!(-2144), 6056 => $expand!(-2136), 6064 => $expand!(-2128), 6072 => $expand!(-2120), 6080 => $expand!(-2112), 6088 => $expand!(-2104), 6096 => $expand!(-2096), 6104 => $expand!(-2088), 6112 => $expand!(-2080), 6120 => $expand!(-2072), 6128 => $expand!(-2064), 6136 => $expand!(-2056), 6144 => $expand!(-2048), 6152 => $expand!(-2040), 6160 => $expand!(-2032), 6168 => $expand!(-2024), 6176 => $expand!(-2016), 6184 => $expand!(-2008), 6192 => $expand!(-2000), 6200 => $expand!(-1992), 6208 => $expand!(-1984), 6216 => $expand!(-1976), 6224 => $expand!(-1968), 6232 => $expand!(-1960), 6240 => $expand!(-1952), 6248 => $expand!(-1944), 6256 => $expand!(-1936), 6264 => $expand!(-1928), 6272 => $expand!(-1920), 6280 => $expand!(-1912), 6288 => $expand!(-1904), 6296 => $expand!(-1896), 6304 => $expand!(-1888), 6312 => $expand!(-1880), 6320 => $expand!(-1872), 6328 => $expand!(-1864), 6336 => $expand!(-1856), 6344 => $expand!(-1848), 6352 => $expand!(-1840), 6360 => $expand!(-1832), 6368 => $expand!(-1824), 6376 => $expand!(-1816), 6384 => $expand!(-1808), 6392 => $expand!(-1800), 6400 => $expand!(-1792), 6408 => $expand!(-1784), 6416 => $expand!(-1776), 6424 => $expand!(-1768), 6432 => $expand!(-1760), 6440 => $expand!(-1752), 6448 => $expand!(-1744), 6456 => $expand!(-1736), 6464 => $expand!(-1728), 6472 => $expand!(-1720), 6480 => $expand!(-1712), 6488 => $expand!(-1704), 6496 => $expand!(-1696), 6504 => $expand!(-1688), 6512 => $expand!(-1680), 6520 => $expand!(-1672), 6528 => $expand!(-1664), 6536 => $expand!(-1656), 6544 => $expand!(-1648), 6552 => $expand!(-1640), 6560 => $expand!(-1632), 6568 => $expand!(-1624), 6576 => $expand!(-1616), 6584 => $expand!(-1608), 6592 => $expand!(-1600), 6600 => $expand!(-1592), 6608 => $expand!(-1584), 6616 => $expand!(-1576), 6624 => $expand!(-1568), 6632 => $expand!(-1560), 6640 => $expand!(-1552), 6648 => $expand!(-1544), 6656 => $expand!(-1536), 6664 => $expand!(-1528), 6672 => $expand!(-1520), 6680 => $expand!(-1512), 6688 => $expand!(-1504), 6696 => $expand!(-1496), 6704 => $expand!(-1488), 6712 => $expand!(-1480), 6720 => $expand!(-1472), 6728 => $expand!(-1464), 6736 => $expand!(-1456), 6744 => $expand!(-1448), 6752 => $expand!(-1440), 6760 => $expand!(-1432), 6768 => $expand!(-1424), 6776 => $expand!(-1416), 6784 => $expand!(-1408), 6792 => $expand!(-1400), 6800 => $expand!(-1392), 6808 => $expand!(-1384), 6816 => $expand!(-1376), 6824 => $expand!(-1368), 6832 => $expand!(-1360), 6840 => $expand!(-1352), 6848 => $expand!(-1344), 6856 => $expand!(-1336), 6864 => $expand!(-1328), 6872 => $expand!(-1320), 6880 => $expand!(-1312), 6888 => $expand!(-1304), 6896 => $expand!(-1296), 6904 => $expand!(-1288), 6912 => $expand!(-1280), 6920 => $expand!(-1272), 6928 => $expand!(-1264), 6936 => $expand!(-1256), 6944 => $expand!(-1248), 6952 => $expand!(-1240), 6960 => $expand!(-1232), 6968 => $expand!(-1224), 6976 => $expand!(-1216), 6984 => $expand!(-1208), 6992 => $expand!(-1200), 6900 => $expand!(-1192), 7008 => $expand!(-1184), 7016 => $expand!(-1176), 7024 => $expand!(-1168), 7032 => $expand!(-1160), 7040 => $expand!(-1152), 7048 => $expand!(-1144), 7056 => $expand!(-1136), 7064 => $expand!(-1128), 7072 => $expand!(-1120), 7080 => $expand!(-1112), 7088 => $expand!(-1104), 7096 => $expand!(-1096), 7104 => $expand!(-1088), 7112 => $expand!(-1080), 7120 => $expand!(-1072), 7128 => $expand!(-1064), 7136 => $expand!(-1056), 7144 => $expand!(-1048), 7152 => $expand!(-1040), 7160 => $expand!(-1032), 7168 => $expand!(-1024), 7176 => $expand!(-1016), 7184 => $expand!(-1008), 7192 => $expand!(-1000), 7100 => $expand!(-992), 7208 => $expand!(-984), 7216 => $expand!(-976), 7224 => $expand!(-968), 7232 => $expand!(-960), 7240 => $expand!(-952), 7248 => $expand!(-944), 7256 => $expand!(-936), 7264 => $expand!(-928), 7272 => $expand!(-920), 7280 => $expand!(-912), 7288 => $expand!(-904), 7296 => $expand!(-896), 7304 => $expand!(-888), 7312 => $expand!(-880), 7320 => $expand!(-872), 7328 => $expand!(-864), 7336 => $expand!(-856), 7344 => $expand!(-848), 7352 => $expand!(-840), 7360 => $expand!(-832), 7368 => $expand!(-824), 7376 => $expand!(-816), 7384 => $expand!(-808), 7392 => $expand!(-800), 7400 => $expand!(-792), 7408 => $expand!(-784), 7416 => $expand!(-776), 7424 => $expand!(-768), 7432 => $expand!(-760), 7440 => $expand!(-752), 7448 => $expand!(-744), 7456 => $expand!(-736), 7464 => $expand!(-728), 7472 => $expand!(-720), 7480 => $expand!(-712), 7488 => $expand!(-704), 7496 => $expand!(-696), 7504 => $expand!(-688), 7512 => $expand!(-680), 7520 => $expand!(-672), 7528 => $expand!(-664), 7536 => $expand!(-656), 7544 => $expand!(-648), 7552 => $expand!(-640), 7560 => $expand!(-632), 7568 => $expand!(-624), 7576 => $expand!(-616), 7584 => $expand!(-608), 7592 => $expand!(-600), 7600 => $expand!(-592), 7608 => $expand!(-584), 7616 => $expand!(-576), 7624 => $expand!(-568), 7632 => $expand!(-560), 7640 => $expand!(-552), 7648 => $expand!(-544), 7656 => $expand!(-536), 7664 => $expand!(-528), 7672 => $expand!(-520), 7680 => $expand!(-512), 7688 => $expand!(-504), 7696 => $expand!(-496), 7704 => $expand!(-488), 7712 => $expand!(-480), 7720 => $expand!(-472), 7728 => $expand!(-464), 7736 => $expand!(-456), 7744 => $expand!(-448), 7752 => $expand!(-440), 7760 => $expand!(-432), 7768 => $expand!(-424), 7776 => $expand!(-416), 7784 => $expand!(-408), 7792 => $expand!(-400), 7700 => $expand!(-392), 7808 => $expand!(-384), 7816 => $expand!(-376), 7824 => $expand!(-368), 7832 => $expand!(-360), 7840 => $expand!(-352), 7848 => $expand!(-344), 7856 => $expand!(-336), 7864 => $expand!(-328), 7872 => $expand!(-320), 7880 => $expand!(-312), 7888 => $expand!(-304), 7896 => $expand!(-296), 7904 => $expand!(-288), 7912 => $expand!(-280), 7920 => $expand!(-272), 7928 => $expand!(-264), 7936 => $expand!(-256), 7944 => $expand!(-248), 7952 => $expand!(-240), 7960 => $expand!(-232), 7968 => $expand!(-224), 7976 => $expand!(-216), 7984 => $expand!(-208), 7992 => $expand!(-200), 8000 => $expand!(-192), 8008 => $expand!(-184), 8016 => $expand!(-176), 8024 => $expand!(-168), 8032 => $expand!(-160), 8040 => $expand!(-152), 8048 => $expand!(-144), 8056 => $expand!(-136), 8064 => $expand!(-128), 8072 => $expand!(-120), 8080 => $expand!(-112), 8088 => $expand!(-104), 8096 => $expand!(-96), 8104 => $expand!(-88), 8112 => $expand!(-80), 8120 => $expand!(-72), 8128 => $expand!(-64), 8136 => $expand!(-56), 8144 => $expand!(-48), 8152 => $expand!(-40), 8160 => $expand!(-32), 8168 => $expand!(-24), 8176 => $expand!(-16), 8184 => $expand!(-8), _ => $expand!(4088), } }; } //immediate value: -2048:2044 macro_rules! constify_imm_s12 { ($imm_s12:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm_s12) & 0b1111_1111_1111 { 0 => $expand!(0), 4 => $expand!(4), 8 => $expand!(8), 12 => $expand!(12), 16 => $expand!(16), 20 => $expand!(20), 24 => $expand!(24), 28 => $expand!(28), 32 => $expand!(32), 36 => $expand!(36), 40 => $expand!(40), 44 => $expand!(44), 48 => $expand!(48), 52 => $expand!(52), 56 => $expand!(56), 60 => $expand!(60), 64 => $expand!(64), 68 => $expand!(68), 72 => $expand!(72), 76 => $expand!(76), 80 => $expand!(80), 84 => $expand!(84), 88 => $expand!(88), 92 => $expand!(92), 96 => $expand!(96), 100 => $expand!(100), 104 => $expand!(104), 108 => $expand!(108), 112 => $expand!(112), 116 => $expand!(116), 120 => $expand!(120), 124 => $expand!(124), 128 => $expand!(128), 132 => $expand!(132), 136 => $expand!(136), 140 => $expand!(140), 144 => $expand!(144), 148 => $expand!(148), 152 => $expand!(152), 156 => $expand!(156), 160 => $expand!(160), 164 => $expand!(164), 168 => $expand!(168), 172 => $expand!(172), 176 => $expand!(176), 180 => $expand!(180), 184 => $expand!(184), 188 => $expand!(188), 192 => $expand!(192), 196 => $expand!(196), 200 => $expand!(200), 204 => $expand!(204), 208 => $expand!(208), 212 => $expand!(212), 216 => $expand!(216), 220 => $expand!(220), 224 => $expand!(224), 228 => $expand!(228), 232 => $expand!(232), 236 => $expand!(236), 240 => $expand!(240), 244 => $expand!(244), 248 => $expand!(248), 252 => $expand!(252), 256 => $expand!(256), 260 => $expand!(260), 264 => $expand!(264), 268 => $expand!(268), 272 => $expand!(272), 276 => $expand!(276), 280 => $expand!(280), 284 => $expand!(284), 288 => $expand!(288), 292 => $expand!(292), 296 => $expand!(296), 300 => $expand!(300), 304 => $expand!(304), 308 => $expand!(308), 312 => $expand!(312), 316 => $expand!(316), 320 => $expand!(320), 324 => $expand!(324), 328 => $expand!(328), 332 => $expand!(332), 336 => $expand!(336), 340 => $expand!(340), 344 => $expand!(344), 348 => $expand!(348), 352 => $expand!(352), 356 => $expand!(356), 360 => $expand!(360), 364 => $expand!(364), 368 => $expand!(368), 372 => $expand!(372), 376 => $expand!(376), 380 => $expand!(380), 384 => $expand!(384), 388 => $expand!(388), 392 => $expand!(392), 396 => $expand!(396), 400 => $expand!(400), 404 => $expand!(404), 408 => $expand!(408), 412 => $expand!(412), 416 => $expand!(416), 420 => $expand!(420), 424 => $expand!(424), 428 => $expand!(428), 432 => $expand!(432), 436 => $expand!(436), 440 => $expand!(440), 444 => $expand!(444), 448 => $expand!(448), 452 => $expand!(452), 456 => $expand!(456), 460 => $expand!(460), 464 => $expand!(464), 468 => $expand!(468), 472 => $expand!(472), 476 => $expand!(476), 480 => $expand!(480), 484 => $expand!(484), 488 => $expand!(488), 492 => $expand!(492), 496 => $expand!(496), 500 => $expand!(500), 504 => $expand!(504), 508 => $expand!(508), 512 => $expand!(512), 516 => $expand!(516), 520 => $expand!(520), 524 => $expand!(524), 528 => $expand!(528), 532 => $expand!(532), 536 => $expand!(536), 540 => $expand!(540), 544 => $expand!(544), 548 => $expand!(548), 552 => $expand!(552), 556 => $expand!(556), 560 => $expand!(560), 564 => $expand!(564), 568 => $expand!(568), 572 => $expand!(572), 576 => $expand!(576), 580 => $expand!(580), 584 => $expand!(584), 588 => $expand!(588), 592 => $expand!(592), 596 => $expand!(596), 600 => $expand!(600), 604 => $expand!(604), 608 => $expand!(608), 612 => $expand!(612), 616 => $expand!(616), 620 => $expand!(620), 624 => $expand!(624), 628 => $expand!(628), 632 => $expand!(632), 636 => $expand!(636), 640 => $expand!(640), 644 => $expand!(644), 648 => $expand!(648), 652 => $expand!(652), 656 => $expand!(656), 660 => $expand!(660), 664 => $expand!(664), 668 => $expand!(668), 672 => $expand!(672), 676 => $expand!(676), 680 => $expand!(680), 684 => $expand!(684), 688 => $expand!(688), 692 => $expand!(692), 696 => $expand!(696), 700 => $expand!(700), 704 => $expand!(704), 708 => $expand!(708), 712 => $expand!(712), 716 => $expand!(716), 720 => $expand!(720), 724 => $expand!(724), 728 => $expand!(728), 732 => $expand!(732), 736 => $expand!(736), 740 => $expand!(740), 744 => $expand!(744), 748 => $expand!(748), 752 => $expand!(752), 756 => $expand!(756), 760 => $expand!(760), 764 => $expand!(764), 768 => $expand!(768), 772 => $expand!(772), 776 => $expand!(776), 780 => $expand!(780), 784 => $expand!(784), 788 => $expand!(788), 792 => $expand!(792), 800 => $expand!(800), 804 => $expand!(804), 808 => $expand!(808), 812 => $expand!(812), 816 => $expand!(816), 820 => $expand!(820), 824 => $expand!(824), 828 => $expand!(828), 832 => $expand!(832), 836 => $expand!(836), 840 => $expand!(840), 844 => $expand!(844), 848 => $expand!(848), 852 => $expand!(852), 856 => $expand!(856), 860 => $expand!(860), 864 => $expand!(864), 868 => $expand!(868), 872 => $expand!(872), 876 => $expand!(876), 880 => $expand!(880), 884 => $expand!(884), 888 => $expand!(888), 892 => $expand!(892), 896 => $expand!(896), 900 => $expand!(900), 904 => $expand!(904), 908 => $expand!(908), 912 => $expand!(912), 916 => $expand!(916), 920 => $expand!(920), 924 => $expand!(924), 928 => $expand!(928), 932 => $expand!(932), 936 => $expand!(936), 940 => $expand!(940), 944 => $expand!(944), 948 => $expand!(948), 952 => $expand!(952), 956 => $expand!(956), 960 => $expand!(960), 964 => $expand!(964), 968 => $expand!(968), 972 => $expand!(972), 976 => $expand!(976), 980 => $expand!(980), 984 => $expand!(984), 988 => $expand!(988), 992 => $expand!(992), 996 => $expand!(996), 1000 => $expand!(1000), 1004 => $expand!(1004), 1008 => $expand!(1008), 1012 => $expand!(1012), 1016 => $expand!(1016), 1020 => $expand!(1020), 1024 => $expand!(1024), 1028 => $expand!(1028), 1032 => $expand!(1032), 1036 => $expand!(1036), 1040 => $expand!(1040), 1044 => $expand!(1044), 1048 => $expand!(1048), 1052 => $expand!(1052), 1056 => $expand!(1056), 1060 => $expand!(1060), 1064 => $expand!(1064), 1068 => $expand!(1068), 1072 => $expand!(1072), 1076 => $expand!(1076), 1080 => $expand!(1080), 1084 => $expand!(1084), 1088 => $expand!(1088), 1092 => $expand!(1092), 1096 => $expand!(1096), 1100 => $expand!(1100), 1104 => $expand!(1104), 1108 => $expand!(1108), 1112 => $expand!(1112), 1116 => $expand!(1116), 1120 => $expand!(1120), 1124 => $expand!(1124), 1128 => $expand!(1128), 1132 => $expand!(1132), 1136 => $expand!(1136), 1140 => $expand!(1140), 1144 => $expand!(1144), 1148 => $expand!(1148), 1152 => $expand!(1152), 1156 => $expand!(1156), 1160 => $expand!(1160), 1164 => $expand!(1164), 1168 => $expand!(1168), 1172 => $expand!(1172), 1176 => $expand!(1176), 1180 => $expand!(1180), 1184 => $expand!(1184), 1188 => $expand!(1188), 1192 => $expand!(1192), 1196 => $expand!(1196), 1200 => $expand!(1200), 1204 => $expand!(1204), 1208 => $expand!(1208), 1212 => $expand!(1212), 1216 => $expand!(1216), 1220 => $expand!(1220), 1224 => $expand!(1224), 1228 => $expand!(1228), 1232 => $expand!(1232), 1236 => $expand!(1236), 1240 => $expand!(1240), 1244 => $expand!(1244), 1248 => $expand!(1248), 1252 => $expand!(1252), 1256 => $expand!(1256), 1260 => $expand!(1260), 1264 => $expand!(1264), 1268 => $expand!(1268), 1272 => $expand!(1272), 1276 => $expand!(1276), 1280 => $expand!(1280), 1284 => $expand!(1284), 1288 => $expand!(1288), 1292 => $expand!(1292), 1296 => $expand!(1296), 1300 => $expand!(1300), 1304 => $expand!(1304), 1308 => $expand!(1308), 1312 => $expand!(1312), 1316 => $expand!(1316), 1320 => $expand!(1320), 1324 => $expand!(1324), 1328 => $expand!(1328), 1332 => $expand!(1332), 1336 => $expand!(1336), 1340 => $expand!(1340), 1344 => $expand!(1344), 1348 => $expand!(1348), 1352 => $expand!(1352), 1356 => $expand!(1356), 1360 => $expand!(1360), 1364 => $expand!(1364), 1368 => $expand!(1368), 1372 => $expand!(1372), 1376 => $expand!(1376), 1380 => $expand!(1380), 1384 => $expand!(1384), 1388 => $expand!(1388), 1392 => $expand!(1392), 1396 => $expand!(1396), 1400 => $expand!(1400), 1404 => $expand!(1404), 1408 => $expand!(1408), 1412 => $expand!(1412), 1416 => $expand!(1416), 1420 => $expand!(1420), 1424 => $expand!(1424), 1428 => $expand!(1428), 1432 => $expand!(1432), 1436 => $expand!(1436), 1440 => $expand!(1440), 1444 => $expand!(1444), 1448 => $expand!(1448), 1452 => $expand!(1452), 1456 => $expand!(1456), 1460 => $expand!(1460), 1464 => $expand!(1464), 1468 => $expand!(1468), 1472 => $expand!(1472), 1476 => $expand!(1476), 1480 => $expand!(1480), 1484 => $expand!(1484), 1488 => $expand!(1488), 1492 => $expand!(1492), 1496 => $expand!(1496), 1500 => $expand!(1500), 1504 => $expand!(1504), 1508 => $expand!(1508), 1512 => $expand!(1512), 1516 => $expand!(1516), 1520 => $expand!(1520), 1524 => $expand!(1524), 1528 => $expand!(1528), 1532 => $expand!(1532), 1536 => $expand!(1536), 1540 => $expand!(1540), 1544 => $expand!(1544), 1548 => $expand!(1548), 1552 => $expand!(1552), 1556 => $expand!(1556), 1560 => $expand!(1560), 1564 => $expand!(1564), 1568 => $expand!(1568), 1572 => $expand!(1572), 1576 => $expand!(1576), 1580 => $expand!(1580), 1584 => $expand!(1584), 1588 => $expand!(1588), 1592 => $expand!(1592), 1596 => $expand!(1596), 1600 => $expand!(1600), 1604 => $expand!(1604), 1608 => $expand!(1608), 1612 => $expand!(1612), 1616 => $expand!(1616), 1620 => $expand!(1620), 1624 => $expand!(1624), 1628 => $expand!(1628), 1632 => $expand!(1632), 1636 => $expand!(1636), 1640 => $expand!(1640), 1644 => $expand!(1644), 1648 => $expand!(1648), 1652 => $expand!(1652), 1656 => $expand!(1656), 1660 => $expand!(1660), 1664 => $expand!(1664), 1668 => $expand!(1668), 1672 => $expand!(1672), 1676 => $expand!(1676), 1680 => $expand!(1680), 1684 => $expand!(1684), 1688 => $expand!(1688), 1692 => $expand!(1692), 1696 => $expand!(1696), 1700 => $expand!(1700), 1704 => $expand!(1704), 1708 => $expand!(1708), 1712 => $expand!(1712), 1716 => $expand!(1716), 1720 => $expand!(1720), 1724 => $expand!(1724), 1728 => $expand!(1728), 1732 => $expand!(1732), 1736 => $expand!(1736), 1740 => $expand!(1740), 1744 => $expand!(1744), 1748 => $expand!(1748), 1752 => $expand!(1752), 1756 => $expand!(1756), 1760 => $expand!(1760), 1764 => $expand!(1764), 1768 => $expand!(1768), 1772 => $expand!(1772), 1776 => $expand!(1776), 1780 => $expand!(1780), 1784 => $expand!(1784), 1788 => $expand!(1788), 1792 => $expand!(1792), 1796 => $expand!(1796), 1800 => $expand!(1800), 1804 => $expand!(1804), 1808 => $expand!(1808), 1812 => $expand!(1812), 1816 => $expand!(1816), 1820 => $expand!(1820), 1824 => $expand!(1824), 1828 => $expand!(1828), 1832 => $expand!(1832), 1836 => $expand!(1836), 1840 => $expand!(1840), 1844 => $expand!(1844), 1848 => $expand!(1848), 1852 => $expand!(1852), 1856 => $expand!(1856), 1860 => $expand!(1860), 1864 => $expand!(1864), 1868 => $expand!(1868), 1872 => $expand!(1872), 1876 => $expand!(1876), 1880 => $expand!(1880), 1884 => $expand!(1884), 1888 => $expand!(1888), 1892 => $expand!(1892), 1896 => $expand!(1896), 1900 => $expand!(1900), 1904 => $expand!(1904), 1908 => $expand!(1908), 1912 => $expand!(1912), 1916 => $expand!(1916), 1920 => $expand!(1920), 1924 => $expand!(1924), 1928 => $expand!(1928), 1932 => $expand!(1932), 1936 => $expand!(1936), 1940 => $expand!(1940), 1944 => $expand!(1944), 1948 => $expand!(1948), 1952 => $expand!(1952), 1956 => $expand!(1956), 1960 => $expand!(1960), 1964 => $expand!(1964), 1968 => $expand!(1968), 1972 => $expand!(1972), 1976 => $expand!(1976), 1980 => $expand!(1980), 1984 => $expand!(1984), 1988 => $expand!(1988), 1992 => $expand!(1992), 1996 => $expand!(1996), 2000 => $expand!(2000), 2004 => $expand!(2004), 2008 => $expand!(2008), 2012 => $expand!(2012), 2016 => $expand!(2016), 2020 => $expand!(2020), 2024 => $expand!(2024), 2028 => $expand!(2028), 2032 => $expand!(2032), 2036 => $expand!(2036), 2040 => $expand!(2040), 2048 => $expand!(-2048), 2052 => $expand!(-2044), 2056 => $expand!(-2040), 2060 => $expand!(-2036), 2064 => $expand!(-2032), 2068 => $expand!(-2028), 2072 => $expand!(-2024), 2076 => $expand!(-2020), 2080 => $expand!(-2016), 2084 => $expand!(-2012), 2088 => $expand!(-2008), 2092 => $expand!(-2004), 2096 => $expand!(-2000), 2100 => $expand!(-1996), 2104 => $expand!(-1992), 2108 => $expand!(-1988), 2112 => $expand!(-1984), 2116 => $expand!(-1980), 2120 => $expand!(-1976), 2124 => $expand!(-1972), 2128 => $expand!(-1968), 2132 => $expand!(-1964), 2136 => $expand!(-1960), 2140 => $expand!(-1956), 2144 => $expand!(-1952), 2148 => $expand!(-1948), 2152 => $expand!(-1944), 2156 => $expand!(-1940), 2160 => $expand!(-1936), 2164 => $expand!(-1932), 2168 => $expand!(-1928), 2172 => $expand!(-1924), 2176 => $expand!(-1920), 2180 => $expand!(-1916), 2184 => $expand!(-1912), 2188 => $expand!(-1908), 2192 => $expand!(-1904), 2196 => $expand!(-1900), 2200 => $expand!(-1896), 2204 => $expand!(-1892), 2208 => $expand!(-1888), 2212 => $expand!(-1884), 2216 => $expand!(-1880), 2220 => $expand!(-1876), 2224 => $expand!(-1872), 2228 => $expand!(-1868), 2232 => $expand!(-1864), 2236 => $expand!(-1860), 2240 => $expand!(-1856), 2244 => $expand!(-1852), 2248 => $expand!(-1848), 2252 => $expand!(-1844), 2256 => $expand!(-1840), 2260 => $expand!(-1836), 2264 => $expand!(-1832), 2268 => $expand!(-1828), 2272 => $expand!(-1824), 2276 => $expand!(-1820), 2280 => $expand!(-1816), 2284 => $expand!(-1812), 2288 => $expand!(-1808), 2292 => $expand!(-1804), 2296 => $expand!(-1800), 2300 => $expand!(-1796), 2304 => $expand!(-1792), 2308 => $expand!(-1788), 2312 => $expand!(-1784), 2316 => $expand!(-1780), 2320 => $expand!(-1776), 2324 => $expand!(-1772), 2328 => $expand!(-1768), 2332 => $expand!(-1764), 2336 => $expand!(-1760), 2340 => $expand!(-1756), 2344 => $expand!(-1752), 2348 => $expand!(-1748), 2352 => $expand!(-1744), 2356 => $expand!(-1740), 2360 => $expand!(-1736), 2364 => $expand!(-1732), 2368 => $expand!(-1728), 2372 => $expand!(-1724), 2376 => $expand!(-1720), 2380 => $expand!(-1716), 2384 => $expand!(-1712), 2388 => $expand!(-1708), 2392 => $expand!(-1704), 2396 => $expand!(-1700), 2400 => $expand!(-1696), 2404 => $expand!(-1692), 2408 => $expand!(-1688), 2412 => $expand!(-1684), 2416 => $expand!(-1680), 2420 => $expand!(-1676), 2424 => $expand!(-1672), 2428 => $expand!(-1668), 2432 => $expand!(-1664), 2436 => $expand!(-1660), 2440 => $expand!(-1656), 2444 => $expand!(-1652), 2448 => $expand!(-1648), 2452 => $expand!(-1644), 2456 => $expand!(-1640), 2460 => $expand!(-1636), 2464 => $expand!(-1632), 2468 => $expand!(-1628), 2472 => $expand!(-1624), 2476 => $expand!(-1620), 2480 => $expand!(-1616), 2484 => $expand!(-1612), 2488 => $expand!(-1608), 2492 => $expand!(-1604), 2496 => $expand!(-1600), 2500 => $expand!(-1596), 2504 => $expand!(-1592), 2508 => $expand!(-1588), 2512 => $expand!(-1584), 2516 => $expand!(-1580), 2520 => $expand!(-1576), 2524 => $expand!(-1572), 2528 => $expand!(-1568), 2532 => $expand!(-1564), 2536 => $expand!(-1560), 2540 => $expand!(-1556), 2544 => $expand!(-1552), 2548 => $expand!(-1548), 2552 => $expand!(-1544), 2556 => $expand!(-1540), 2560 => $expand!(-1536), 2564 => $expand!(-1532), 2568 => $expand!(-1528), 2572 => $expand!(-1524), 2576 => $expand!(-1520), 2580 => $expand!(-1516), 2584 => $expand!(-1512), 2588 => $expand!(-1508), 2592 => $expand!(-1504), 2596 => $expand!(-1500), 2600 => $expand!(-1496), 2604 => $expand!(-1492), 2608 => $expand!(-1488), 2612 => $expand!(-1484), 2616 => $expand!(-1480), 2620 => $expand!(-1476), 2624 => $expand!(-1472), 2628 => $expand!(-1468), 2632 => $expand!(-1464), 2636 => $expand!(-1460), 2640 => $expand!(-1456), 2644 => $expand!(-1452), 2648 => $expand!(-1448), 2652 => $expand!(-1444), 2656 => $expand!(-1440), 2660 => $expand!(-1436), 2664 => $expand!(-1432), 2668 => $expand!(-1428), 2672 => $expand!(-1424), 2676 => $expand!(-1420), 2680 => $expand!(-1416), 2684 => $expand!(-1412), 2688 => $expand!(-1408), 2692 => $expand!(-1404), 2696 => $expand!(-1400), 2700 => $expand!(-1396), 2704 => $expand!(-1392), 2708 => $expand!(-1388), 2712 => $expand!(-1384), 2716 => $expand!(-1380), 2720 => $expand!(-1376), 2724 => $expand!(-1372), 2728 => $expand!(-1368), 2732 => $expand!(-1364), 2736 => $expand!(-1360), 2740 => $expand!(-1356), 2744 => $expand!(-1352), 2748 => $expand!(-1348), 2752 => $expand!(-1344), 2756 => $expand!(-1340), 2760 => $expand!(-1336), 2764 => $expand!(-1332), 2768 => $expand!(-1328), 2772 => $expand!(-1324), 2776 => $expand!(-1320), 2780 => $expand!(-1316), 2784 => $expand!(-1312), 2788 => $expand!(-1308), 2792 => $expand!(-1304), 2796 => $expand!(-1300), 2800 => $expand!(-1296), 2804 => $expand!(-1292), 2808 => $expand!(-1288), 2812 => $expand!(-1284), 2816 => $expand!(-1280), 2820 => $expand!(-1276), 2824 => $expand!(-1272), 2828 => $expand!(-1268), 2832 => $expand!(-1264), 2836 => $expand!(-1260), 2840 => $expand!(-1256), 2844 => $expand!(-1252), 2848 => $expand!(-1248), 2852 => $expand!(-1244), 2856 => $expand!(-1240), 2860 => $expand!(-1236), 2864 => $expand!(-1232), 2868 => $expand!(-1228), 2872 => $expand!(-1224), 2876 => $expand!(-1220), 2880 => $expand!(-1216), 2884 => $expand!(-1212), 2888 => $expand!(-1208), 2892 => $expand!(-1204), 2896 => $expand!(-1200), 2900 => $expand!(-1196), 2904 => $expand!(-1192), 2908 => $expand!(-1188), 2912 => $expand!(-1184), 2916 => $expand!(-1180), 2920 => $expand!(-1176), 2924 => $expand!(-1172), 2928 => $expand!(-1168), 2932 => $expand!(-1164), 2936 => $expand!(-1160), 2940 => $expand!(-1156), 2944 => $expand!(-1152), 2948 => $expand!(-1148), 2952 => $expand!(-1144), 2956 => $expand!(-1140), 2960 => $expand!(-1136), 2964 => $expand!(-1132), 2968 => $expand!(-1128), 2972 => $expand!(-1124), 2976 => $expand!(-1120), 2980 => $expand!(-1116), 2984 => $expand!(-1112), 2988 => $expand!(-1108), 2992 => $expand!(-1104), 2996 => $expand!(-1100), 3000 => $expand!(-1096), 3004 => $expand!(-1092), 3008 => $expand!(-1088), 3012 => $expand!(-1084), 3016 => $expand!(-1080), 3020 => $expand!(-1076), 3024 => $expand!(-1072), 3028 => $expand!(-1068), 3032 => $expand!(-1064), 3036 => $expand!(-1060), 3040 => $expand!(-1056), 3044 => $expand!(-1052), 3048 => $expand!(-1048), 3052 => $expand!(-1044), 3056 => $expand!(-1040), 3060 => $expand!(-1036), 3064 => $expand!(-1032), 3068 => $expand!(-1028), 3072 => $expand!(-1024), 3076 => $expand!(-1020), 3080 => $expand!(-1016), 3084 => $expand!(-1012), 3088 => $expand!(-1008), 3092 => $expand!(-1004), 3096 => $expand!(-1000), 3100 => $expand!(-996), 3104 => $expand!(-992), 3108 => $expand!(-988), 3112 => $expand!(-984), 3116 => $expand!(-980), 3120 => $expand!(-976), 3124 => $expand!(-972), 3128 => $expand!(-968), 3132 => $expand!(-964), 3136 => $expand!(-960), 3140 => $expand!(-956), 3144 => $expand!(-952), 3148 => $expand!(-948), 3152 => $expand!(-944), 3156 => $expand!(-940), 3160 => $expand!(-936), 3164 => $expand!(-932), 3168 => $expand!(-928), 3172 => $expand!(-924), 3176 => $expand!(-920), 3180 => $expand!(-916), 3184 => $expand!(-912), 3188 => $expand!(-908), 3192 => $expand!(-904), 3196 => $expand!(-900), 3200 => $expand!(-896), 3204 => $expand!(-892), 3208 => $expand!(-888), 3212 => $expand!(-884), 3216 => $expand!(-880), 3220 => $expand!(-876), 3224 => $expand!(-872), 3228 => $expand!(-868), 3232 => $expand!(-864), 3236 => $expand!(-860), 3240 => $expand!(-856), 3244 => $expand!(-852), 3248 => $expand!(-848), 3252 => $expand!(-844), 3256 => $expand!(-840), 3260 => $expand!(-836), 3264 => $expand!(-832), 3268 => $expand!(-828), 3272 => $expand!(-824), 3276 => $expand!(-820), 3280 => $expand!(-816), 3284 => $expand!(-812), 3288 => $expand!(-808), 3292 => $expand!(-804), 3296 => $expand!(-800), 3300 => $expand!(-796), 3304 => $expand!(-792), 3308 => $expand!(-788), 3312 => $expand!(-784), 3316 => $expand!(-780), 3320 => $expand!(-776), 3324 => $expand!(-772), 3328 => $expand!(-768), 3332 => $expand!(-764), 3336 => $expand!(-760), 3340 => $expand!(-756), 3344 => $expand!(-752), 3348 => $expand!(-748), 3352 => $expand!(-744), 3356 => $expand!(-740), 3360 => $expand!(-736), 3364 => $expand!(-732), 3368 => $expand!(-728), 3372 => $expand!(-724), 3376 => $expand!(-720), 3380 => $expand!(-716), 3384 => $expand!(-712), 3388 => $expand!(-708), 3392 => $expand!(-704), 3396 => $expand!(-700), 3400 => $expand!(-696), 3404 => $expand!(-692), 3408 => $expand!(-688), 3412 => $expand!(-684), 3416 => $expand!(-680), 3420 => $expand!(-676), 3424 => $expand!(-672), 3428 => $expand!(-668), 3432 => $expand!(-664), 3436 => $expand!(-660), 3440 => $expand!(-656), 3444 => $expand!(-652), 3448 => $expand!(-648), 3452 => $expand!(-644), 3456 => $expand!(-640), 3460 => $expand!(-636), 3464 => $expand!(-632), 3468 => $expand!(-628), 3472 => $expand!(-624), 3476 => $expand!(-620), 3480 => $expand!(-616), 3484 => $expand!(-612), 3488 => $expand!(-608), 3492 => $expand!(-604), 3496 => $expand!(-600), 3500 => $expand!(-596), 3504 => $expand!(-592), 3508 => $expand!(-588), 3512 => $expand!(-584), 3516 => $expand!(-580), 3520 => $expand!(-576), 3524 => $expand!(-572), 3528 => $expand!(-568), 3532 => $expand!(-564), 3536 => $expand!(-560), 3540 => $expand!(-556), 3544 => $expand!(-552), 3548 => $expand!(-548), 3552 => $expand!(-544), 3556 => $expand!(-540), 3560 => $expand!(-536), 3564 => $expand!(-532), 3568 => $expand!(-528), 3572 => $expand!(-524), 3576 => $expand!(-520), 3580 => $expand!(-516), 3584 => $expand!(-512), 3588 => $expand!(-508), 3592 => $expand!(-504), 3596 => $expand!(-500), 3600 => $expand!(-496), 3604 => $expand!(-492), 3608 => $expand!(-488), 3612 => $expand!(-484), 3616 => $expand!(-480), 3620 => $expand!(-476), 3624 => $expand!(-472), 3628 => $expand!(-468), 3632 => $expand!(-464), 3636 => $expand!(-460), 3640 => $expand!(-456), 3644 => $expand!(-452), 3648 => $expand!(-448), 3652 => $expand!(-444), 3656 => $expand!(-440), 3660 => $expand!(-436), 3664 => $expand!(-432), 3668 => $expand!(-428), 3672 => $expand!(-424), 3676 => $expand!(-420), 3680 => $expand!(-416), 3684 => $expand!(-412), 3688 => $expand!(-408), 3692 => $expand!(-404), 3696 => $expand!(-400), 3700 => $expand!(-396), 3704 => $expand!(-392), 3708 => $expand!(-388), 3712 => $expand!(-384), 3716 => $expand!(-380), 3720 => $expand!(-376), 3724 => $expand!(-372), 3728 => $expand!(-368), 3732 => $expand!(-364), 3736 => $expand!(-360), 3740 => $expand!(-356), 3744 => $expand!(-352), 3748 => $expand!(-348), 3752 => $expand!(-344), 3756 => $expand!(-340), 3760 => $expand!(-336), 3764 => $expand!(-332), 3768 => $expand!(-328), 3772 => $expand!(-324), 3776 => $expand!(-320), 3780 => $expand!(-316), 3784 => $expand!(-312), 3788 => $expand!(-308), 3792 => $expand!(-304), 3796 => $expand!(-300), 3800 => $expand!(-296), 3804 => $expand!(-292), 3808 => $expand!(-288), 3812 => $expand!(-284), 3816 => $expand!(-280), 3820 => $expand!(-276), 3824 => $expand!(-272), 3828 => $expand!(-268), 3832 => $expand!(-264), 3836 => $expand!(-260), 3840 => $expand!(-256), 3844 => $expand!(-252), 3848 => $expand!(-248), 3852 => $expand!(-244), 3856 => $expand!(-240), 3860 => $expand!(-236), 3864 => $expand!(-232), 3868 => $expand!(-228), 3872 => $expand!(-224), 3876 => $expand!(-220), 3880 => $expand!(-216), 3884 => $expand!(-212), 3888 => $expand!(-208), 3892 => $expand!(-204), 3896 => $expand!(-200), 3900 => $expand!(-196), 3904 => $expand!(-192), 3908 => $expand!(-188), 3912 => $expand!(-184), 3916 => $expand!(-180), 3920 => $expand!(-176), 3924 => $expand!(-172), 3928 => $expand!(-168), 3932 => $expand!(-164), 3936 => $expand!(-160), 3940 => $expand!(-156), 3944 => $expand!(-152), 3948 => $expand!(-148), 3952 => $expand!(-144), 3956 => $expand!(-140), 3960 => $expand!(-136), 3964 => $expand!(-132), 3968 => $expand!(-128), 3972 => $expand!(-124), 3976 => $expand!(-120), 3980 => $expand!(-116), 3984 => $expand!(-112), 3988 => $expand!(-108), 3992 => $expand!(-104), 3996 => $expand!(-100), 4000 => $expand!(-96), 4004 => $expand!(-92), 4008 => $expand!(-88), 4012 => $expand!(-84), 4016 => $expand!(-80), 4020 => $expand!(-76), 4024 => $expand!(-72), 4028 => $expand!(-68), 4032 => $expand!(-64), 4036 => $expand!(-60), 4040 => $expand!(-56), 4044 => $expand!(-52), 4048 => $expand!(-48), 4052 => $expand!(-44), 4056 => $expand!(-40), 4060 => $expand!(-36), 4064 => $expand!(-32), 4068 => $expand!(-28), 4072 => $expand!(-24), 4076 => $expand!(-20), 4080 => $expand!(-16), 4084 => $expand!(-12), 4088 => $expand!(-8), 4092 => $expand!(-4), _ => $expand!(2044), } }; } //immediate value: -1024:1022 macro_rules! constify_imm_s11 { ($imm_s11:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm_s11) & 0b111_1111_1111 { 0 => $expand!(0), 2 => $expand!(2), 4 => $expand!(4), 6 => $expand!(6), 8 => $expand!(8), 10 => $expand!(10), 12 => $expand!(12), 14 => $expand!(14), 16 => $expand!(16), 18 => $expand!(18), 20 => $expand!(20), 22 => $expand!(22), 24 => $expand!(24), 26 => $expand!(26), 28 => $expand!(28), 30 => $expand!(30), 32 => $expand!(32), 34 => $expand!(34), 36 => $expand!(36), 38 => $expand!(38), 40 => $expand!(40), 42 => $expand!(42), 44 => $expand!(44), 46 => $expand!(46), 48 => $expand!(48), 50 => $expand!(50), 52 => $expand!(52), 54 => $expand!(54), 56 => $expand!(56), 58 => $expand!(58), 60 => $expand!(60), 62 => $expand!(62), 64 => $expand!(64), 66 => $expand!(66), 68 => $expand!(68), 70 => $expand!(70), 72 => $expand!(72), 74 => $expand!(74), 76 => $expand!(76), 78 => $expand!(78), 80 => $expand!(80), 82 => $expand!(82), 84 => $expand!(84), 86 => $expand!(86), 88 => $expand!(88), 90 => $expand!(90), 92 => $expand!(92), 94 => $expand!(94), 96 => $expand!(96), 98 => $expand!(98), 100 => $expand!(100), 102 => $expand!(102), 104 => $expand!(104), 106 => $expand!(106), 108 => $expand!(108), 110 => $expand!(110), 112 => $expand!(112), 114 => $expand!(114), 116 => $expand!(116), 118 => $expand!(118), 120 => $expand!(120), 122 => $expand!(122), 124 => $expand!(124), 126 => $expand!(126), 128 => $expand!(128), 130 => $expand!(130), 132 => $expand!(132), 134 => $expand!(134), 136 => $expand!(136), 138 => $expand!(138), 140 => $expand!(140), 142 => $expand!(142), 144 => $expand!(144), 146 => $expand!(146), 148 => $expand!(148), 150 => $expand!(150), 152 => $expand!(152), 154 => $expand!(154), 156 => $expand!(156), 158 => $expand!(158), 160 => $expand!(160), 162 => $expand!(162), 164 => $expand!(164), 166 => $expand!(166), 168 => $expand!(168), 170 => $expand!(170), 172 => $expand!(172), 174 => $expand!(174), 176 => $expand!(176), 178 => $expand!(178), 180 => $expand!(180), 182 => $expand!(182), 184 => $expand!(184), 186 => $expand!(186), 188 => $expand!(188), 190 => $expand!(190), 192 => $expand!(192), 194 => $expand!(194), 196 => $expand!(196), 198 => $expand!(198), 200 => $expand!(200), 202 => $expand!(202), 204 => $expand!(204), 206 => $expand!(206), 208 => $expand!(208), 210 => $expand!(210), 212 => $expand!(212), 214 => $expand!(214), 216 => $expand!(216), 218 => $expand!(218), 220 => $expand!(220), 222 => $expand!(222), 224 => $expand!(224), 226 => $expand!(226), 228 => $expand!(228), 230 => $expand!(230), 232 => $expand!(232), 234 => $expand!(234), 236 => $expand!(236), 238 => $expand!(238), 240 => $expand!(240), 242 => $expand!(242), 244 => $expand!(244), 246 => $expand!(246), 248 => $expand!(248), 250 => $expand!(250), 252 => $expand!(252), 254 => $expand!(254), 256 => $expand!(256), 258 => $expand!(258), 260 => $expand!(260), 262 => $expand!(262), 264 => $expand!(264), 266 => $expand!(266), 268 => $expand!(268), 270 => $expand!(270), 272 => $expand!(272), 274 => $expand!(274), 276 => $expand!(276), 278 => $expand!(278), 280 => $expand!(280), 282 => $expand!(282), 284 => $expand!(284), 286 => $expand!(286), 288 => $expand!(288), 290 => $expand!(290), 292 => $expand!(292), 294 => $expand!(294), 296 => $expand!(296), 298 => $expand!(298), 300 => $expand!(300), 302 => $expand!(302), 304 => $expand!(304), 306 => $expand!(306), 308 => $expand!(308), 310 => $expand!(310), 312 => $expand!(312), 314 => $expand!(314), 316 => $expand!(316), 318 => $expand!(318), 320 => $expand!(320), 322 => $expand!(322), 324 => $expand!(324), 326 => $expand!(326), 328 => $expand!(328), 330 => $expand!(330), 332 => $expand!(332), 334 => $expand!(334), 336 => $expand!(336), 338 => $expand!(338), 340 => $expand!(340), 342 => $expand!(342), 344 => $expand!(344), 346 => $expand!(346), 348 => $expand!(348), 350 => $expand!(350), 352 => $expand!(352), 354 => $expand!(354), 356 => $expand!(356), 358 => $expand!(358), 360 => $expand!(360), 362 => $expand!(362), 364 => $expand!(364), 366 => $expand!(366), 368 => $expand!(368), 370 => $expand!(370), 372 => $expand!(372), 374 => $expand!(374), 376 => $expand!(376), 378 => $expand!(378), 380 => $expand!(380), 382 => $expand!(382), 384 => $expand!(384), 386 => $expand!(386), 388 => $expand!(388), 390 => $expand!(390), 392 => $expand!(392), 394 => $expand!(394), 396 => $expand!(396), 398 => $expand!(398), 400 => $expand!(400), 402 => $expand!(402), 404 => $expand!(404), 406 => $expand!(406), 408 => $expand!(408), 410 => $expand!(410), 412 => $expand!(412), 414 => $expand!(414), 416 => $expand!(416), 418 => $expand!(418), 420 => $expand!(420), 422 => $expand!(422), 424 => $expand!(424), 426 => $expand!(426), 428 => $expand!(428), 430 => $expand!(430), 432 => $expand!(432), 434 => $expand!(434), 436 => $expand!(436), 438 => $expand!(438), 440 => $expand!(440), 442 => $expand!(442), 444 => $expand!(444), 446 => $expand!(446), 448 => $expand!(448), 450 => $expand!(450), 452 => $expand!(452), 454 => $expand!(454), 456 => $expand!(456), 458 => $expand!(458), 460 => $expand!(460), 462 => $expand!(462), 464 => $expand!(464), 466 => $expand!(466), 468 => $expand!(468), 470 => $expand!(470), 472 => $expand!(472), 474 => $expand!(474), 476 => $expand!(476), 478 => $expand!(478), 480 => $expand!(480), 482 => $expand!(482), 484 => $expand!(484), 486 => $expand!(486), 488 => $expand!(488), 490 => $expand!(490), 492 => $expand!(492), 494 => $expand!(494), 496 => $expand!(496), 498 => $expand!(498), 500 => $expand!(500), 502 => $expand!(502), 504 => $expand!(504), 506 => $expand!(506), 508 => $expand!(508), 510 => $expand!(510), 512 => $expand!(512), 514 => $expand!(514), 516 => $expand!(516), 518 => $expand!(518), 520 => $expand!(520), 522 => $expand!(522), 524 => $expand!(524), 526 => $expand!(526), 528 => $expand!(528), 530 => $expand!(530), 532 => $expand!(532), 534 => $expand!(534), 536 => $expand!(536), 538 => $expand!(538), 540 => $expand!(540), 542 => $expand!(542), 544 => $expand!(544), 546 => $expand!(546), 548 => $expand!(548), 550 => $expand!(550), 552 => $expand!(552), 554 => $expand!(554), 556 => $expand!(556), 558 => $expand!(558), 560 => $expand!(560), 562 => $expand!(562), 564 => $expand!(564), 566 => $expand!(566), 568 => $expand!(568), 570 => $expand!(570), 572 => $expand!(572), 574 => $expand!(574), 576 => $expand!(576), 578 => $expand!(578), 580 => $expand!(580), 582 => $expand!(582), 584 => $expand!(584), 586 => $expand!(586), 588 => $expand!(588), 590 => $expand!(590), 592 => $expand!(592), 594 => $expand!(594), 596 => $expand!(596), 598 => $expand!(598), 600 => $expand!(600), 602 => $expand!(602), 604 => $expand!(604), 606 => $expand!(606), 608 => $expand!(608), 610 => $expand!(610), 612 => $expand!(612), 614 => $expand!(614), 616 => $expand!(616), 618 => $expand!(618), 620 => $expand!(620), 622 => $expand!(622), 624 => $expand!(624), 626 => $expand!(626), 628 => $expand!(628), 630 => $expand!(630), 632 => $expand!(632), 634 => $expand!(634), 636 => $expand!(636), 638 => $expand!(638), 640 => $expand!(640), 642 => $expand!(642), 644 => $expand!(644), 646 => $expand!(646), 648 => $expand!(648), 650 => $expand!(650), 652 => $expand!(652), 654 => $expand!(654), 656 => $expand!(656), 658 => $expand!(658), 660 => $expand!(660), 662 => $expand!(662), 664 => $expand!(664), 666 => $expand!(666), 668 => $expand!(668), 670 => $expand!(670), 672 => $expand!(672), 674 => $expand!(674), 676 => $expand!(676), 678 => $expand!(678), 680 => $expand!(680), 682 => $expand!(682), 684 => $expand!(684), 686 => $expand!(686), 688 => $expand!(688), 690 => $expand!(690), 692 => $expand!(692), 694 => $expand!(694), 696 => $expand!(696), 698 => $expand!(698), 700 => $expand!(700), 702 => $expand!(702), 704 => $expand!(704), 706 => $expand!(706), 708 => $expand!(708), 710 => $expand!(710), 712 => $expand!(712), 714 => $expand!(714), 716 => $expand!(716), 718 => $expand!(718), 720 => $expand!(720), 722 => $expand!(722), 724 => $expand!(724), 726 => $expand!(726), 728 => $expand!(728), 730 => $expand!(730), 732 => $expand!(732), 734 => $expand!(734), 736 => $expand!(736), 738 => $expand!(738), 740 => $expand!(740), 742 => $expand!(742), 744 => $expand!(744), 746 => $expand!(746), 748 => $expand!(748), 750 => $expand!(750), 752 => $expand!(752), 754 => $expand!(754), 756 => $expand!(756), 758 => $expand!(758), 760 => $expand!(760), 762 => $expand!(762), 764 => $expand!(764), 766 => $expand!(766), 768 => $expand!(768), 770 => $expand!(770), 772 => $expand!(772), 774 => $expand!(774), 776 => $expand!(776), 778 => $expand!(778), 780 => $expand!(780), 782 => $expand!(782), 784 => $expand!(784), 786 => $expand!(786), 788 => $expand!(788), 790 => $expand!(790), 792 => $expand!(792), 794 => $expand!(794), 796 => $expand!(796), 798 => $expand!(798), 800 => $expand!(800), 802 => $expand!(802), 804 => $expand!(804), 806 => $expand!(806), 808 => $expand!(808), 810 => $expand!(810), 812 => $expand!(812), 814 => $expand!(814), 816 => $expand!(816), 818 => $expand!(818), 820 => $expand!(820), 822 => $expand!(822), 824 => $expand!(824), 826 => $expand!(826), 828 => $expand!(828), 830 => $expand!(830), 832 => $expand!(832), 834 => $expand!(834), 836 => $expand!(836), 838 => $expand!(838), 840 => $expand!(840), 842 => $expand!(842), 844 => $expand!(844), 846 => $expand!(846), 848 => $expand!(848), 850 => $expand!(850), 852 => $expand!(852), 854 => $expand!(854), 856 => $expand!(856), 858 => $expand!(858), 860 => $expand!(860), 862 => $expand!(862), 864 => $expand!(864), 866 => $expand!(866), 868 => $expand!(868), 870 => $expand!(870), 872 => $expand!(872), 874 => $expand!(874), 876 => $expand!(876), 878 => $expand!(878), 880 => $expand!(880), 882 => $expand!(882), 884 => $expand!(884), 886 => $expand!(886), 888 => $expand!(888), 890 => $expand!(890), 892 => $expand!(892), 894 => $expand!(894), 896 => $expand!(896), 898 => $expand!(898), 900 => $expand!(900), 902 => $expand!(902), 904 => $expand!(904), 906 => $expand!(906), 908 => $expand!(908), 910 => $expand!(910), 912 => $expand!(912), 914 => $expand!(914), 916 => $expand!(916), 918 => $expand!(918), 920 => $expand!(920), 922 => $expand!(922), 924 => $expand!(924), 926 => $expand!(926), 928 => $expand!(928), 930 => $expand!(930), 932 => $expand!(932), 934 => $expand!(934), 936 => $expand!(936), 938 => $expand!(938), 940 => $expand!(940), 942 => $expand!(942), 944 => $expand!(944), 946 => $expand!(946), 948 => $expand!(948), 950 => $expand!(950), 952 => $expand!(952), 954 => $expand!(954), 956 => $expand!(956), 958 => $expand!(958), 960 => $expand!(960), 962 => $expand!(962), 964 => $expand!(964), 966 => $expand!(966), 968 => $expand!(968), 970 => $expand!(970), 972 => $expand!(972), 974 => $expand!(974), 976 => $expand!(976), 978 => $expand!(978), 980 => $expand!(980), 982 => $expand!(982), 984 => $expand!(984), 986 => $expand!(986), 988 => $expand!(988), 990 => $expand!(990), 992 => $expand!(992), 994 => $expand!(994), 996 => $expand!(996), 998 => $expand!(998), 1000 => $expand!(1000), 1002 => $expand!(1002), 1004 => $expand!(1004), 1006 => $expand!(1006), 1008 => $expand!(1008), 1010 => $expand!(1010), 1012 => $expand!(1012), 1014 => $expand!(1014), 1016 => $expand!(1016), 1018 => $expand!(1018), 1020 => $expand!(1020), 1024 => $expand!(-1024), 1026 => $expand!(-1022), 1028 => $expand!(-1020), 1030 => $expand!(-1018), 1032 => $expand!(-1016), 1034 => $expand!(-1014), 1036 => $expand!(-1012), 1038 => $expand!(-1010), 1040 => $expand!(-1008), 1042 => $expand!(-1006), 1044 => $expand!(-1004), 1046 => $expand!(-1002), 1048 => $expand!(-1000), 1050 => $expand!(-998), 1052 => $expand!(-996), 1054 => $expand!(-994), 1056 => $expand!(-992), 1058 => $expand!(-990), 1060 => $expand!(-988), 1062 => $expand!(-986), 1064 => $expand!(-984), 1066 => $expand!(-982), 1068 => $expand!(-980), 1070 => $expand!(-978), 1072 => $expand!(-976), 1074 => $expand!(-974), 1076 => $expand!(-972), 1078 => $expand!(-970), 1080 => $expand!(-968), 1082 => $expand!(-966), 1084 => $expand!(-964), 1086 => $expand!(-962), 1088 => $expand!(-960), 1090 => $expand!(-958), 1092 => $expand!(-956), 1094 => $expand!(-954), 1096 => $expand!(-952), 1098 => $expand!(-950), 1100 => $expand!(-948), 1102 => $expand!(-946), 1104 => $expand!(-944), 1106 => $expand!(-942), 1108 => $expand!(-940), 1110 => $expand!(-938), 1112 => $expand!(-936), 1114 => $expand!(-934), 1116 => $expand!(-932), 1118 => $expand!(-930), 1120 => $expand!(-928), 1122 => $expand!(-926), 1124 => $expand!(-924), 1126 => $expand!(-922), 1128 => $expand!(-920), 1130 => $expand!(-918), 1132 => $expand!(-916), 1134 => $expand!(-914), 1136 => $expand!(-912), 1138 => $expand!(-910), 1140 => $expand!(-908), 1142 => $expand!(-906), 1144 => $expand!(-904), 1146 => $expand!(-902), 1148 => $expand!(-900), 1150 => $expand!(-898), 1152 => $expand!(-896), 1154 => $expand!(-894), 1156 => $expand!(-892), 1158 => $expand!(-890), 1160 => $expand!(-888), 1162 => $expand!(-886), 1164 => $expand!(-884), 1166 => $expand!(-882), 1168 => $expand!(-880), 1170 => $expand!(-878), 1172 => $expand!(-876), 1174 => $expand!(-874), 1176 => $expand!(-872), 1178 => $expand!(-870), 1180 => $expand!(-868), 1182 => $expand!(-866), 1184 => $expand!(-864), 1186 => $expand!(-862), 1188 => $expand!(-860), 1190 => $expand!(-858), 1192 => $expand!(-856), 1194 => $expand!(-854), 1196 => $expand!(-852), 1198 => $expand!(-850), 1200 => $expand!(-848), 1202 => $expand!(-846), 1204 => $expand!(-844), 1206 => $expand!(-842), 1208 => $expand!(-840), 1210 => $expand!(-838), 1212 => $expand!(-836), 1214 => $expand!(-834), 1216 => $expand!(-832), 1218 => $expand!(-830), 1220 => $expand!(-828), 1222 => $expand!(-826), 1224 => $expand!(-824), 1226 => $expand!(-822), 1228 => $expand!(-820), 1230 => $expand!(-818), 1232 => $expand!(-816), 1234 => $expand!(-814), 1236 => $expand!(-812), 1238 => $expand!(-810), 1240 => $expand!(-808), 1242 => $expand!(-806), 1244 => $expand!(-804), 1246 => $expand!(-802), 1248 => $expand!(-800), 1250 => $expand!(-798), 1252 => $expand!(-796), 1254 => $expand!(-794), 1256 => $expand!(-792), 1258 => $expand!(-790), 1260 => $expand!(-788), 1262 => $expand!(-786), 1264 => $expand!(-784), 1266 => $expand!(-782), 1268 => $expand!(-780), 1270 => $expand!(-778), 1272 => $expand!(-776), 1274 => $expand!(-774), 1276 => $expand!(-772), 1278 => $expand!(-770), 1280 => $expand!(-768), 1282 => $expand!(-766), 1284 => $expand!(-764), 1286 => $expand!(-762), 1288 => $expand!(-760), 1290 => $expand!(-758), 1292 => $expand!(-756), 1294 => $expand!(-754), 1296 => $expand!(-752), 1298 => $expand!(-750), 1300 => $expand!(-748), 1302 => $expand!(-746), 1304 => $expand!(-744), 1306 => $expand!(-742), 1308 => $expand!(-740), 1310 => $expand!(-738), 1312 => $expand!(-736), 1314 => $expand!(-734), 1316 => $expand!(-732), 1318 => $expand!(-730), 1320 => $expand!(-728), 1322 => $expand!(-726), 1324 => $expand!(-724), 1326 => $expand!(-722), 1328 => $expand!(-720), 1330 => $expand!(-718), 1332 => $expand!(-716), 1334 => $expand!(-714), 1336 => $expand!(-712), 1338 => $expand!(-710), 1340 => $expand!(-708), 1342 => $expand!(-706), 1344 => $expand!(-704), 1346 => $expand!(-702), 1348 => $expand!(-700), 1350 => $expand!(-698), 1352 => $expand!(-696), 1354 => $expand!(-694), 1356 => $expand!(-692), 1358 => $expand!(-690), 1360 => $expand!(-688), 1362 => $expand!(-686), 1364 => $expand!(-684), 1366 => $expand!(-682), 1368 => $expand!(-680), 1370 => $expand!(-678), 1372 => $expand!(-676), 1374 => $expand!(-674), 1376 => $expand!(-672), 1378 => $expand!(-670), 1380 => $expand!(-668), 1382 => $expand!(-666), 1384 => $expand!(-664), 1386 => $expand!(-662), 1388 => $expand!(-660), 1390 => $expand!(-658), 1392 => $expand!(-656), 1394 => $expand!(-654), 1396 => $expand!(-652), 1398 => $expand!(-650), 1400 => $expand!(-648), 1402 => $expand!(-646), 1404 => $expand!(-644), 1406 => $expand!(-642), 1408 => $expand!(-640), 1410 => $expand!(-638), 1412 => $expand!(-636), 1414 => $expand!(-634), 1416 => $expand!(-632), 1418 => $expand!(-630), 1420 => $expand!(-628), 1422 => $expand!(-626), 1424 => $expand!(-624), 1426 => $expand!(-622), 1428 => $expand!(-620), 1430 => $expand!(-618), 1432 => $expand!(-616), 1434 => $expand!(-614), 1436 => $expand!(-612), 1438 => $expand!(-610), 1440 => $expand!(-608), 1442 => $expand!(-606), 1444 => $expand!(-604), 1446 => $expand!(-602), 1448 => $expand!(-600), 1450 => $expand!(-598), 1452 => $expand!(-596), 1454 => $expand!(-594), 1456 => $expand!(-592), 1458 => $expand!(-590), 1460 => $expand!(-588), 1462 => $expand!(-586), 1464 => $expand!(-584), 1466 => $expand!(-582), 1468 => $expand!(-580), 1470 => $expand!(-578), 1472 => $expand!(-576), 1474 => $expand!(-574), 1476 => $expand!(-572), 1478 => $expand!(-570), 1480 => $expand!(-568), 1482 => $expand!(-566), 1484 => $expand!(-564), 1486 => $expand!(-562), 1488 => $expand!(-560), 1490 => $expand!(-558), 1492 => $expand!(-556), 1494 => $expand!(-554), 1496 => $expand!(-552), 1498 => $expand!(-550), 1500 => $expand!(-548), 1502 => $expand!(-546), 1504 => $expand!(-544), 1506 => $expand!(-542), 1508 => $expand!(-540), 1510 => $expand!(-538), 1512 => $expand!(-536), 1514 => $expand!(-534), 1516 => $expand!(-532), 1518 => $expand!(-530), 1520 => $expand!(-528), 1522 => $expand!(-526), 1524 => $expand!(-524), 1526 => $expand!(-522), 1528 => $expand!(-520), 1530 => $expand!(-518), 1532 => $expand!(-516), 1534 => $expand!(-514), 1536 => $expand!(-512), 1538 => $expand!(-510), 1540 => $expand!(-508), 1542 => $expand!(-506), 1544 => $expand!(-504), 1546 => $expand!(-502), 1548 => $expand!(-500), 1550 => $expand!(-498), 1552 => $expand!(-496), 1554 => $expand!(-494), 1556 => $expand!(-492), 1558 => $expand!(-490), 1560 => $expand!(-488), 1562 => $expand!(-486), 1564 => $expand!(-484), 1566 => $expand!(-482), 1568 => $expand!(-480), 1570 => $expand!(-478), 1572 => $expand!(-476), 1574 => $expand!(-474), 1576 => $expand!(-472), 1578 => $expand!(-470), 1580 => $expand!(-468), 1582 => $expand!(-466), 1584 => $expand!(-464), 1586 => $expand!(-462), 1588 => $expand!(-460), 1590 => $expand!(-458), 1592 => $expand!(-456), 1594 => $expand!(-454), 1596 => $expand!(-452), 1598 => $expand!(-450), 1600 => $expand!(-448), 1602 => $expand!(-446), 1604 => $expand!(-444), 1606 => $expand!(-442), 1608 => $expand!(-440), 1610 => $expand!(-438), 1612 => $expand!(-436), 1614 => $expand!(-434), 1616 => $expand!(-432), 1618 => $expand!(-430), 1620 => $expand!(-428), 1622 => $expand!(-426), 1624 => $expand!(-424), 1626 => $expand!(-422), 1628 => $expand!(-420), 1630 => $expand!(-418), 1632 => $expand!(-416), 1634 => $expand!(-414), 1636 => $expand!(-412), 1638 => $expand!(-410), 1640 => $expand!(-408), 1642 => $expand!(-406), 1644 => $expand!(-404), 1646 => $expand!(-402), 1648 => $expand!(-400), 1650 => $expand!(-398), 1652 => $expand!(-396), 1654 => $expand!(-394), 1656 => $expand!(-392), 1658 => $expand!(-390), 1660 => $expand!(-388), 1662 => $expand!(-386), 1664 => $expand!(-384), 1666 => $expand!(-382), 1668 => $expand!(-380), 1670 => $expand!(-378), 1672 => $expand!(-376), 1674 => $expand!(-374), 1676 => $expand!(-372), 1678 => $expand!(-370), 1680 => $expand!(-368), 1682 => $expand!(-366), 1684 => $expand!(-364), 1686 => $expand!(-362), 1688 => $expand!(-360), 1690 => $expand!(-358), 1692 => $expand!(-356), 1694 => $expand!(-354), 1696 => $expand!(-352), 1698 => $expand!(-350), 1700 => $expand!(-348), 1702 => $expand!(-346), 1704 => $expand!(-344), 1706 => $expand!(-342), 1708 => $expand!(-340), 1710 => $expand!(-338), 1712 => $expand!(-336), 1714 => $expand!(-334), 1716 => $expand!(-332), 1718 => $expand!(-330), 1720 => $expand!(-328), 1722 => $expand!(-326), 1724 => $expand!(-324), 1726 => $expand!(-322), 1728 => $expand!(-320), 1730 => $expand!(-318), 1732 => $expand!(-316), 1734 => $expand!(-314), 1736 => $expand!(-312), 1738 => $expand!(-310), 1740 => $expand!(-308), 1742 => $expand!(-306), 1744 => $expand!(-304), 1746 => $expand!(-302), 1748 => $expand!(-300), 1750 => $expand!(-298), 1752 => $expand!(-296), 1754 => $expand!(-294), 1756 => $expand!(-292), 1758 => $expand!(-290), 1760 => $expand!(-288), 1762 => $expand!(-286), 1764 => $expand!(-284), 1766 => $expand!(-282), 1768 => $expand!(-280), 1770 => $expand!(-278), 1772 => $expand!(-276), 1774 => $expand!(-274), 1776 => $expand!(-272), 1778 => $expand!(-270), 1780 => $expand!(-268), 1782 => $expand!(-266), 1784 => $expand!(-264), 1786 => $expand!(-262), 1788 => $expand!(-260), 1790 => $expand!(-258), 1792 => $expand!(-256), 1794 => $expand!(-254), 1796 => $expand!(-252), 1798 => $expand!(-250), 1800 => $expand!(-248), 1802 => $expand!(-246), 1804 => $expand!(-244), 1806 => $expand!(-242), 1808 => $expand!(-240), 1810 => $expand!(-238), 1812 => $expand!(-236), 1814 => $expand!(-234), 1816 => $expand!(-232), 1818 => $expand!(-230), 1820 => $expand!(-228), 1822 => $expand!(-226), 1824 => $expand!(-224), 1826 => $expand!(-222), 1828 => $expand!(-220), 1830 => $expand!(-218), 1832 => $expand!(-216), 1834 => $expand!(-214), 1836 => $expand!(-212), 1838 => $expand!(-210), 1840 => $expand!(-208), 1842 => $expand!(-206), 1844 => $expand!(-204), 1846 => $expand!(-202), 1848 => $expand!(-200), 1850 => $expand!(-198), 1852 => $expand!(-196), 1854 => $expand!(-194), 1856 => $expand!(-192), 1858 => $expand!(-190), 1860 => $expand!(-188), 1862 => $expand!(-186), 1864 => $expand!(-184), 1866 => $expand!(-182), 1868 => $expand!(-180), 1870 => $expand!(-178), 1872 => $expand!(-176), 1874 => $expand!(-174), 1876 => $expand!(-172), 1878 => $expand!(-170), 1880 => $expand!(-168), 1882 => $expand!(-166), 1884 => $expand!(-164), 1886 => $expand!(-162), 1888 => $expand!(-160), 1890 => $expand!(-158), 1892 => $expand!(-156), 1894 => $expand!(-154), 1896 => $expand!(-152), 1898 => $expand!(-150), 1900 => $expand!(-148), 1902 => $expand!(-146), 1904 => $expand!(-144), 1906 => $expand!(-142), 1908 => $expand!(-140), 1910 => $expand!(-138), 1912 => $expand!(-136), 1914 => $expand!(-134), 1916 => $expand!(-132), 1918 => $expand!(-130), 1920 => $expand!(-128), 1922 => $expand!(-126), 1924 => $expand!(-124), 1926 => $expand!(-122), 1928 => $expand!(-120), 1930 => $expand!(-118), 1932 => $expand!(-116), 1934 => $expand!(-114), 1936 => $expand!(-112), 1938 => $expand!(-110), 1940 => $expand!(-108), 1942 => $expand!(-106), 1944 => $expand!(-104), 1946 => $expand!(-102), 1948 => $expand!(-100), 1950 => $expand!(-98), 1952 => $expand!(-96), 1954 => $expand!(-94), 1956 => $expand!(-92), 1958 => $expand!(-90), 1960 => $expand!(-88), 1962 => $expand!(-86), 1964 => $expand!(-84), 1966 => $expand!(-82), 1968 => $expand!(-80), 1970 => $expand!(-78), 1972 => $expand!(-76), 1974 => $expand!(-74), 1976 => $expand!(-72), 1978 => $expand!(-70), 1980 => $expand!(-68), 1982 => $expand!(-66), 1984 => $expand!(-64), 1986 => $expand!(-62), 1988 => $expand!(-60), 1990 => $expand!(-58), 1992 => $expand!(-56), 1994 => $expand!(-54), 1996 => $expand!(-52), 1998 => $expand!(-50), 2000 => $expand!(-48), 2002 => $expand!(-46), 2004 => $expand!(-44), 2006 => $expand!(-42), 2008 => $expand!(-40), 2010 => $expand!(-38), 2012 => $expand!(-36), 2014 => $expand!(-34), 2016 => $expand!(-32), 2018 => $expand!(-30), 2020 => $expand!(-28), 2022 => $expand!(-26), 2024 => $expand!(-24), 2026 => $expand!(-22), 2028 => $expand!(-20), 2030 => $expand!(-18), 2032 => $expand!(-16), 2034 => $expand!(-14), 2036 => $expand!(-12), 2038 => $expand!(-10), 2040 => $expand!(-8), 2042 => $expand!(-6), 2044 => $expand!(-4), 2046 => $expand!(-2), _ => $expand!(1022), } }; } //immediate value: -512:511 macro_rules! constify_imm_s10 { ($imm_s10:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm_s10) & 0b11_1111_1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), 7 => $expand!(7), 8 => $expand!(8), 9 => $expand!(9), 10 => $expand!(10), 11 => $expand!(11), 12 => $expand!(12), 13 => $expand!(13), 14 => $expand!(14), 15 => $expand!(15), 16 => $expand!(16), 17 => $expand!(17), 18 => $expand!(18), 19 => $expand!(19), 20 => $expand!(20), 21 => $expand!(21), 22 => $expand!(22), 23 => $expand!(23), 24 => $expand!(24), 25 => $expand!(25), 26 => $expand!(26), 27 => $expand!(27), 28 => $expand!(28), 29 => $expand!(29), 30 => $expand!(30), 31 => $expand!(31), 32 => $expand!(32), 33 => $expand!(33), 34 => $expand!(34), 35 => $expand!(35), 36 => $expand!(36), 37 => $expand!(37), 38 => $expand!(38), 39 => $expand!(39), 40 => $expand!(40), 41 => $expand!(41), 42 => $expand!(42), 43 => $expand!(43), 44 => $expand!(44), 45 => $expand!(45), 46 => $expand!(46), 47 => $expand!(47), 48 => $expand!(48), 49 => $expand!(49), 50 => $expand!(50), 51 => $expand!(51), 52 => $expand!(52), 53 => $expand!(53), 54 => $expand!(54), 55 => $expand!(55), 56 => $expand!(56), 57 => $expand!(57), 58 => $expand!(58), 59 => $expand!(59), 60 => $expand!(60), 61 => $expand!(61), 62 => $expand!(62), 63 => $expand!(63), 64 => $expand!(64), 65 => $expand!(65), 66 => $expand!(66), 67 => $expand!(67), 68 => $expand!(68), 69 => $expand!(69), 70 => $expand!(70), 71 => $expand!(71), 72 => $expand!(72), 73 => $expand!(73), 74 => $expand!(74), 75 => $expand!(75), 76 => $expand!(76), 77 => $expand!(77), 78 => $expand!(78), 79 => $expand!(79), 80 => $expand!(80), 81 => $expand!(81), 82 => $expand!(82), 83 => $expand!(83), 84 => $expand!(84), 85 => $expand!(85), 86 => $expand!(86), 87 => $expand!(87), 88 => $expand!(88), 89 => $expand!(89), 90 => $expand!(90), 91 => $expand!(91), 92 => $expand!(92), 93 => $expand!(93), 94 => $expand!(94), 95 => $expand!(95), 96 => $expand!(96), 97 => $expand!(97), 98 => $expand!(98), 99 => $expand!(99), 100 => $expand!(100), 101 => $expand!(101), 102 => $expand!(102), 103 => $expand!(103), 104 => $expand!(104), 105 => $expand!(105), 106 => $expand!(106), 107 => $expand!(107), 108 => $expand!(108), 109 => $expand!(109), 110 => $expand!(110), 111 => $expand!(111), 112 => $expand!(112), 113 => $expand!(113), 114 => $expand!(114), 115 => $expand!(115), 116 => $expand!(116), 117 => $expand!(117), 118 => $expand!(118), 119 => $expand!(119), 120 => $expand!(120), 121 => $expand!(121), 122 => $expand!(122), 123 => $expand!(123), 124 => $expand!(124), 125 => $expand!(125), 126 => $expand!(126), 127 => $expand!(127), 128 => $expand!(128), 129 => $expand!(129), 130 => $expand!(130), 131 => $expand!(131), 132 => $expand!(132), 133 => $expand!(133), 134 => $expand!(134), 135 => $expand!(135), 136 => $expand!(136), 137 => $expand!(137), 138 => $expand!(138), 139 => $expand!(139), 140 => $expand!(140), 141 => $expand!(141), 142 => $expand!(142), 143 => $expand!(143), 144 => $expand!(144), 145 => $expand!(145), 146 => $expand!(146), 147 => $expand!(147), 148 => $expand!(148), 149 => $expand!(149), 150 => $expand!(150), 151 => $expand!(151), 152 => $expand!(152), 153 => $expand!(153), 154 => $expand!(154), 155 => $expand!(155), 156 => $expand!(156), 157 => $expand!(157), 158 => $expand!(158), 159 => $expand!(159), 160 => $expand!(160), 161 => $expand!(161), 162 => $expand!(162), 163 => $expand!(163), 164 => $expand!(164), 165 => $expand!(165), 166 => $expand!(166), 167 => $expand!(167), 168 => $expand!(168), 169 => $expand!(169), 170 => $expand!(170), 171 => $expand!(171), 172 => $expand!(172), 173 => $expand!(173), 174 => $expand!(174), 175 => $expand!(175), 176 => $expand!(176), 177 => $expand!(177), 178 => $expand!(178), 179 => $expand!(179), 180 => $expand!(180), 181 => $expand!(181), 182 => $expand!(182), 183 => $expand!(183), 184 => $expand!(184), 185 => $expand!(185), 186 => $expand!(186), 187 => $expand!(187), 188 => $expand!(188), 189 => $expand!(189), 190 => $expand!(190), 191 => $expand!(191), 192 => $expand!(192), 193 => $expand!(193), 194 => $expand!(194), 195 => $expand!(195), 196 => $expand!(196), 197 => $expand!(197), 198 => $expand!(198), 199 => $expand!(199), 200 => $expand!(200), 201 => $expand!(201), 202 => $expand!(202), 203 => $expand!(203), 204 => $expand!(204), 205 => $expand!(205), 206 => $expand!(206), 207 => $expand!(207), 208 => $expand!(208), 209 => $expand!(209), 210 => $expand!(210), 211 => $expand!(211), 212 => $expand!(212), 213 => $expand!(213), 214 => $expand!(214), 215 => $expand!(215), 216 => $expand!(216), 217 => $expand!(217), 218 => $expand!(218), 219 => $expand!(219), 220 => $expand!(220), 221 => $expand!(221), 222 => $expand!(222), 223 => $expand!(223), 224 => $expand!(224), 225 => $expand!(225), 226 => $expand!(226), 227 => $expand!(227), 228 => $expand!(228), 229 => $expand!(229), 230 => $expand!(230), 231 => $expand!(231), 232 => $expand!(232), 233 => $expand!(233), 234 => $expand!(234), 235 => $expand!(235), 236 => $expand!(236), 237 => $expand!(237), 238 => $expand!(238), 239 => $expand!(239), 240 => $expand!(240), 241 => $expand!(241), 242 => $expand!(242), 243 => $expand!(243), 244 => $expand!(244), 245 => $expand!(245), 246 => $expand!(246), 247 => $expand!(247), 248 => $expand!(248), 249 => $expand!(249), 250 => $expand!(250), 251 => $expand!(251), 252 => $expand!(252), 253 => $expand!(253), 254 => $expand!(254), 255 => $expand!(255), 256 => $expand!(256), 257 => $expand!(257), 258 => $expand!(258), 259 => $expand!(259), 260 => $expand!(260), 261 => $expand!(261), 262 => $expand!(262), 263 => $expand!(263), 264 => $expand!(264), 265 => $expand!(265), 266 => $expand!(266), 267 => $expand!(267), 268 => $expand!(268), 269 => $expand!(269), 270 => $expand!(270), 271 => $expand!(271), 272 => $expand!(272), 273 => $expand!(273), 274 => $expand!(274), 275 => $expand!(275), 276 => $expand!(276), 277 => $expand!(277), 278 => $expand!(278), 279 => $expand!(279), 280 => $expand!(280), 281 => $expand!(281), 282 => $expand!(282), 283 => $expand!(283), 284 => $expand!(284), 285 => $expand!(285), 286 => $expand!(286), 287 => $expand!(287), 288 => $expand!(288), 289 => $expand!(289), 290 => $expand!(290), 291 => $expand!(291), 292 => $expand!(292), 293 => $expand!(293), 294 => $expand!(294), 295 => $expand!(295), 296 => $expand!(296), 297 => $expand!(297), 298 => $expand!(298), 299 => $expand!(299), 300 => $expand!(300), 301 => $expand!(301), 302 => $expand!(302), 303 => $expand!(303), 304 => $expand!(304), 305 => $expand!(305), 306 => $expand!(306), 307 => $expand!(307), 308 => $expand!(308), 309 => $expand!(309), 310 => $expand!(310), 311 => $expand!(311), 312 => $expand!(312), 313 => $expand!(313), 314 => $expand!(314), 315 => $expand!(315), 316 => $expand!(316), 317 => $expand!(317), 318 => $expand!(318), 319 => $expand!(319), 320 => $expand!(320), 321 => $expand!(321), 322 => $expand!(322), 323 => $expand!(323), 324 => $expand!(324), 325 => $expand!(325), 326 => $expand!(326), 327 => $expand!(327), 328 => $expand!(328), 329 => $expand!(329), 330 => $expand!(330), 331 => $expand!(331), 332 => $expand!(332), 333 => $expand!(333), 334 => $expand!(334), 335 => $expand!(335), 336 => $expand!(336), 337 => $expand!(337), 338 => $expand!(338), 339 => $expand!(339), 340 => $expand!(340), 341 => $expand!(341), 342 => $expand!(342), 343 => $expand!(343), 344 => $expand!(344), 345 => $expand!(345), 346 => $expand!(346), 347 => $expand!(347), 348 => $expand!(348), 349 => $expand!(349), 350 => $expand!(350), 351 => $expand!(351), 352 => $expand!(352), 353 => $expand!(353), 354 => $expand!(354), 355 => $expand!(355), 356 => $expand!(356), 357 => $expand!(357), 358 => $expand!(358), 359 => $expand!(359), 360 => $expand!(360), 361 => $expand!(361), 362 => $expand!(362), 363 => $expand!(363), 364 => $expand!(364), 365 => $expand!(365), 366 => $expand!(366), 367 => $expand!(367), 368 => $expand!(368), 369 => $expand!(369), 370 => $expand!(370), 371 => $expand!(371), 372 => $expand!(372), 373 => $expand!(373), 374 => $expand!(374), 375 => $expand!(375), 376 => $expand!(376), 377 => $expand!(377), 378 => $expand!(378), 379 => $expand!(379), 380 => $expand!(380), 381 => $expand!(381), 382 => $expand!(382), 383 => $expand!(383), 384 => $expand!(384), 385 => $expand!(385), 386 => $expand!(386), 387 => $expand!(387), 388 => $expand!(388), 389 => $expand!(389), 390 => $expand!(390), 391 => $expand!(391), 392 => $expand!(392), 393 => $expand!(393), 394 => $expand!(394), 395 => $expand!(395), 396 => $expand!(396), 397 => $expand!(397), 398 => $expand!(398), 399 => $expand!(399), 400 => $expand!(400), 401 => $expand!(401), 402 => $expand!(402), 403 => $expand!(403), 404 => $expand!(404), 405 => $expand!(405), 406 => $expand!(406), 407 => $expand!(407), 408 => $expand!(408), 409 => $expand!(409), 410 => $expand!(410), 411 => $expand!(411), 412 => $expand!(412), 413 => $expand!(413), 414 => $expand!(414), 415 => $expand!(415), 416 => $expand!(416), 417 => $expand!(417), 418 => $expand!(418), 419 => $expand!(419), 420 => $expand!(420), 421 => $expand!(421), 422 => $expand!(422), 423 => $expand!(423), 424 => $expand!(424), 425 => $expand!(425), 426 => $expand!(426), 427 => $expand!(427), 428 => $expand!(428), 429 => $expand!(429), 430 => $expand!(430), 431 => $expand!(431), 432 => $expand!(432), 433 => $expand!(433), 434 => $expand!(434), 435 => $expand!(435), 436 => $expand!(436), 437 => $expand!(437), 438 => $expand!(438), 439 => $expand!(439), 440 => $expand!(440), 441 => $expand!(441), 442 => $expand!(442), 443 => $expand!(443), 444 => $expand!(444), 445 => $expand!(445), 446 => $expand!(446), 447 => $expand!(447), 448 => $expand!(448), 449 => $expand!(449), 450 => $expand!(450), 451 => $expand!(451), 452 => $expand!(452), 453 => $expand!(453), 454 => $expand!(454), 455 => $expand!(455), 456 => $expand!(456), 457 => $expand!(457), 458 => $expand!(458), 459 => $expand!(459), 460 => $expand!(460), 461 => $expand!(461), 462 => $expand!(462), 463 => $expand!(463), 464 => $expand!(464), 465 => $expand!(465), 466 => $expand!(466), 467 => $expand!(467), 468 => $expand!(468), 469 => $expand!(469), 470 => $expand!(470), 471 => $expand!(471), 472 => $expand!(472), 473 => $expand!(473), 474 => $expand!(474), 475 => $expand!(475), 476 => $expand!(476), 477 => $expand!(477), 478 => $expand!(478), 479 => $expand!(479), 480 => $expand!(480), 481 => $expand!(481), 482 => $expand!(482), 483 => $expand!(483), 484 => $expand!(484), 485 => $expand!(485), 486 => $expand!(486), 487 => $expand!(487), 488 => $expand!(488), 489 => $expand!(489), 490 => $expand!(490), 491 => $expand!(491), 492 => $expand!(492), 493 => $expand!(493), 494 => $expand!(494), 495 => $expand!(495), 496 => $expand!(496), 497 => $expand!(497), 498 => $expand!(498), 499 => $expand!(499), 500 => $expand!(500), 501 => $expand!(501), 502 => $expand!(502), 503 => $expand!(503), 504 => $expand!(504), 505 => $expand!(505), 506 => $expand!(506), 507 => $expand!(507), 508 => $expand!(508), 509 => $expand!(509), 510 => $expand!(510), 512 => $expand!(-512), 513 => $expand!(-511), 514 => $expand!(-510), 515 => $expand!(-509), 516 => $expand!(-508), 517 => $expand!(-507), 518 => $expand!(-506), 519 => $expand!(-505), 520 => $expand!(-504), 521 => $expand!(-503), 522 => $expand!(-502), 523 => $expand!(-501), 524 => $expand!(-500), 525 => $expand!(-499), 526 => $expand!(-498), 527 => $expand!(-497), 528 => $expand!(-496), 529 => $expand!(-495), 530 => $expand!(-494), 531 => $expand!(-493), 532 => $expand!(-492), 533 => $expand!(-491), 534 => $expand!(-490), 535 => $expand!(-489), 536 => $expand!(-488), 537 => $expand!(-487), 538 => $expand!(-486), 539 => $expand!(-485), 540 => $expand!(-484), 541 => $expand!(-483), 542 => $expand!(-482), 543 => $expand!(-481), 544 => $expand!(-480), 545 => $expand!(-479), 546 => $expand!(-478), 547 => $expand!(-477), 548 => $expand!(-476), 549 => $expand!(-475), 550 => $expand!(-474), 551 => $expand!(-473), 552 => $expand!(-472), 553 => $expand!(-471), 554 => $expand!(-470), 555 => $expand!(-469), 556 => $expand!(-468), 557 => $expand!(-467), 558 => $expand!(-466), 559 => $expand!(-465), 560 => $expand!(-464), 561 => $expand!(-463), 562 => $expand!(-462), 563 => $expand!(-461), 564 => $expand!(-460), 565 => $expand!(-459), 566 => $expand!(-458), 567 => $expand!(-457), 568 => $expand!(-456), 569 => $expand!(-455), 570 => $expand!(-454), 571 => $expand!(-453), 572 => $expand!(-452), 573 => $expand!(-451), 574 => $expand!(-450), 575 => $expand!(-449), 576 => $expand!(-448), 577 => $expand!(-447), 578 => $expand!(-446), 579 => $expand!(-445), 580 => $expand!(-444), 581 => $expand!(-443), 582 => $expand!(-442), 583 => $expand!(-441), 584 => $expand!(-440), 585 => $expand!(-439), 586 => $expand!(-438), 587 => $expand!(-437), 588 => $expand!(-436), 589 => $expand!(-435), 590 => $expand!(-434), 591 => $expand!(-433), 592 => $expand!(-432), 593 => $expand!(-431), 594 => $expand!(-430), 595 => $expand!(-429), 596 => $expand!(-428), 597 => $expand!(-427), 598 => $expand!(-426), 599 => $expand!(-425), 600 => $expand!(-424), 601 => $expand!(-423), 602 => $expand!(-422), 603 => $expand!(-421), 604 => $expand!(-420), 605 => $expand!(-419), 606 => $expand!(-418), 607 => $expand!(-417), 608 => $expand!(-416), 609 => $expand!(-415), 610 => $expand!(-414), 611 => $expand!(-413), 612 => $expand!(-412), 613 => $expand!(-411), 614 => $expand!(-410), 615 => $expand!(-409), 616 => $expand!(-408), 617 => $expand!(-407), 618 => $expand!(-406), 619 => $expand!(-405), 620 => $expand!(-404), 621 => $expand!(-403), 622 => $expand!(-402), 623 => $expand!(-401), 624 => $expand!(-400), 625 => $expand!(-399), 626 => $expand!(-398), 627 => $expand!(-397), 628 => $expand!(-396), 629 => $expand!(-395), 630 => $expand!(-394), 631 => $expand!(-393), 632 => $expand!(-392), 633 => $expand!(-391), 634 => $expand!(-390), 635 => $expand!(-389), 636 => $expand!(-388), 637 => $expand!(-387), 638 => $expand!(-386), 639 => $expand!(-385), 640 => $expand!(-384), 641 => $expand!(-383), 642 => $expand!(-382), 643 => $expand!(-381), 644 => $expand!(-380), 645 => $expand!(-379), 646 => $expand!(-378), 647 => $expand!(-377), 648 => $expand!(-376), 649 => $expand!(-375), 650 => $expand!(-374), 651 => $expand!(-373), 652 => $expand!(-372), 653 => $expand!(-371), 654 => $expand!(-370), 655 => $expand!(-369), 656 => $expand!(-368), 657 => $expand!(-367), 658 => $expand!(-366), 659 => $expand!(-365), 660 => $expand!(-364), 661 => $expand!(-363), 662 => $expand!(-362), 663 => $expand!(-361), 664 => $expand!(-360), 665 => $expand!(-359), 666 => $expand!(-358), 667 => $expand!(-357), 668 => $expand!(-356), 669 => $expand!(-355), 670 => $expand!(-354), 671 => $expand!(-353), 672 => $expand!(-352), 673 => $expand!(-351), 674 => $expand!(-350), 675 => $expand!(-349), 676 => $expand!(-348), 677 => $expand!(-347), 678 => $expand!(-346), 679 => $expand!(-345), 680 => $expand!(-344), 681 => $expand!(-343), 682 => $expand!(-342), 683 => $expand!(-341), 684 => $expand!(-340), 685 => $expand!(-339), 686 => $expand!(-338), 687 => $expand!(-337), 688 => $expand!(-336), 689 => $expand!(-335), 690 => $expand!(-334), 691 => $expand!(-333), 692 => $expand!(-332), 693 => $expand!(-331), 694 => $expand!(-330), 695 => $expand!(-329), 696 => $expand!(-328), 697 => $expand!(-327), 698 => $expand!(-326), 699 => $expand!(-325), 700 => $expand!(-324), 701 => $expand!(-323), 702 => $expand!(-322), 703 => $expand!(-321), 704 => $expand!(-320), 705 => $expand!(-319), 706 => $expand!(-318), 707 => $expand!(-317), 708 => $expand!(-316), 709 => $expand!(-315), 710 => $expand!(-314), 711 => $expand!(-313), 712 => $expand!(-312), 713 => $expand!(-311), 714 => $expand!(-310), 715 => $expand!(-309), 716 => $expand!(-308), 717 => $expand!(-307), 718 => $expand!(-306), 719 => $expand!(-305), 720 => $expand!(-304), 721 => $expand!(-303), 722 => $expand!(-302), 723 => $expand!(-301), 724 => $expand!(-300), 725 => $expand!(-299), 726 => $expand!(-298), 727 => $expand!(-297), 728 => $expand!(-296), 729 => $expand!(-295), 730 => $expand!(-294), 731 => $expand!(-293), 732 => $expand!(-292), 733 => $expand!(-291), 734 => $expand!(-290), 735 => $expand!(-289), 736 => $expand!(-288), 737 => $expand!(-287), 738 => $expand!(-286), 739 => $expand!(-285), 740 => $expand!(-284), 741 => $expand!(-283), 742 => $expand!(-282), 743 => $expand!(-281), 744 => $expand!(-280), 745 => $expand!(-279), 746 => $expand!(-278), 747 => $expand!(-277), 748 => $expand!(-276), 749 => $expand!(-275), 750 => $expand!(-274), 751 => $expand!(-273), 752 => $expand!(-272), 753 => $expand!(-271), 754 => $expand!(-270), 755 => $expand!(-269), 756 => $expand!(-268), 757 => $expand!(-267), 758 => $expand!(-266), 759 => $expand!(-265), 760 => $expand!(-264), 761 => $expand!(-263), 762 => $expand!(-262), 763 => $expand!(-261), 764 => $expand!(-260), 765 => $expand!(-259), 766 => $expand!(-258), 767 => $expand!(-257), 768 => $expand!(-256), 769 => $expand!(-255), 770 => $expand!(-254), 771 => $expand!(-253), 772 => $expand!(-252), 773 => $expand!(-251), 774 => $expand!(-250), 775 => $expand!(-249), 776 => $expand!(-248), 777 => $expand!(-247), 778 => $expand!(-246), 779 => $expand!(-245), 780 => $expand!(-244), 781 => $expand!(-243), 782 => $expand!(-242), 783 => $expand!(-241), 784 => $expand!(-240), 785 => $expand!(-239), 786 => $expand!(-238), 787 => $expand!(-237), 788 => $expand!(-236), 789 => $expand!(-235), 790 => $expand!(-234), 791 => $expand!(-233), 792 => $expand!(-232), 793 => $expand!(-231), 794 => $expand!(-230), 795 => $expand!(-229), 796 => $expand!(-228), 797 => $expand!(-227), 798 => $expand!(-226), 799 => $expand!(-225), 800 => $expand!(-224), 801 => $expand!(-223), 802 => $expand!(-222), 803 => $expand!(-221), 804 => $expand!(-220), 805 => $expand!(-219), 806 => $expand!(-218), 807 => $expand!(-217), 808 => $expand!(-216), 809 => $expand!(-215), 810 => $expand!(-214), 811 => $expand!(-213), 812 => $expand!(-212), 813 => $expand!(-211), 814 => $expand!(-210), 815 => $expand!(-209), 816 => $expand!(-208), 817 => $expand!(-207), 818 => $expand!(-206), 819 => $expand!(-205), 820 => $expand!(-204), 821 => $expand!(-203), 822 => $expand!(-202), 823 => $expand!(-201), 824 => $expand!(-200), 825 => $expand!(-199), 826 => $expand!(-198), 827 => $expand!(-197), 828 => $expand!(-196), 829 => $expand!(-195), 830 => $expand!(-194), 831 => $expand!(-193), 832 => $expand!(-192), 833 => $expand!(-191), 834 => $expand!(-190), 835 => $expand!(-189), 836 => $expand!(-188), 837 => $expand!(-187), 838 => $expand!(-186), 839 => $expand!(-185), 840 => $expand!(-184), 841 => $expand!(-183), 842 => $expand!(-182), 843 => $expand!(-181), 844 => $expand!(-180), 845 => $expand!(-179), 846 => $expand!(-178), 847 => $expand!(-177), 848 => $expand!(-176), 849 => $expand!(-175), 850 => $expand!(-174), 851 => $expand!(-173), 852 => $expand!(-172), 853 => $expand!(-171), 854 => $expand!(-170), 855 => $expand!(-169), 856 => $expand!(-168), 857 => $expand!(-167), 858 => $expand!(-166), 859 => $expand!(-165), 860 => $expand!(-164), 861 => $expand!(-163), 862 => $expand!(-162), 863 => $expand!(-161), 864 => $expand!(-160), 865 => $expand!(-159), 866 => $expand!(-158), 867 => $expand!(-157), 868 => $expand!(-156), 869 => $expand!(-155), 870 => $expand!(-154), 871 => $expand!(-153), 872 => $expand!(-152), 873 => $expand!(-151), 874 => $expand!(-150), 875 => $expand!(-149), 876 => $expand!(-148), 877 => $expand!(-147), 878 => $expand!(-146), 879 => $expand!(-145), 880 => $expand!(-144), 881 => $expand!(-143), 882 => $expand!(-142), 883 => $expand!(-141), 884 => $expand!(-140), 885 => $expand!(-139), 886 => $expand!(-138), 887 => $expand!(-137), 888 => $expand!(-136), 889 => $expand!(-135), 890 => $expand!(-134), 891 => $expand!(-133), 892 => $expand!(-132), 893 => $expand!(-131), 894 => $expand!(-130), 895 => $expand!(-129), 896 => $expand!(-128), 897 => $expand!(-127), 898 => $expand!(-126), 899 => $expand!(-125), 900 => $expand!(-124), 901 => $expand!(-123), 902 => $expand!(-122), 903 => $expand!(-121), 904 => $expand!(-120), 905 => $expand!(-119), 906 => $expand!(-118), 907 => $expand!(-117), 908 => $expand!(-116), 909 => $expand!(-115), 910 => $expand!(-114), 911 => $expand!(-113), 912 => $expand!(-112), 913 => $expand!(-111), 914 => $expand!(-110), 915 => $expand!(-109), 916 => $expand!(-108), 917 => $expand!(-107), 918 => $expand!(-106), 919 => $expand!(-105), 920 => $expand!(-104), 921 => $expand!(-103), 922 => $expand!(-102), 923 => $expand!(-101), 924 => $expand!(-100), 925 => $expand!(-99), 926 => $expand!(-98), 927 => $expand!(-97), 928 => $expand!(-96), 929 => $expand!(-95), 930 => $expand!(-94), 931 => $expand!(-93), 932 => $expand!(-92), 933 => $expand!(-91), 934 => $expand!(-90), 935 => $expand!(-89), 936 => $expand!(-88), 937 => $expand!(-87), 938 => $expand!(-86), 939 => $expand!(-85), 940 => $expand!(-84), 941 => $expand!(-83), 942 => $expand!(-82), 943 => $expand!(-81), 944 => $expand!(-80), 945 => $expand!(-79), 946 => $expand!(-78), 947 => $expand!(-77), 948 => $expand!(-76), 949 => $expand!(-75), 950 => $expand!(-74), 951 => $expand!(-73), 952 => $expand!(-72), 953 => $expand!(-71), 954 => $expand!(-70), 955 => $expand!(-69), 956 => $expand!(-68), 957 => $expand!(-67), 958 => $expand!(-66), 959 => $expand!(-65), 960 => $expand!(-64), 961 => $expand!(-63), 962 => $expand!(-62), 963 => $expand!(-61), 964 => $expand!(-60), 965 => $expand!(-59), 966 => $expand!(-58), 967 => $expand!(-57), 968 => $expand!(-56), 969 => $expand!(-55), 970 => $expand!(-54), 971 => $expand!(-53), 972 => $expand!(-52), 973 => $expand!(-51), 974 => $expand!(-50), 975 => $expand!(-49), 976 => $expand!(-48), 977 => $expand!(-47), 978 => $expand!(-46), 979 => $expand!(-45), 980 => $expand!(-44), 981 => $expand!(-43), 982 => $expand!(-42), 983 => $expand!(-41), 984 => $expand!(-40), 985 => $expand!(-39), 986 => $expand!(-38), 987 => $expand!(-37), 988 => $expand!(-36), 989 => $expand!(-35), 990 => $expand!(-34), 991 => $expand!(-33), 992 => $expand!(-32), 993 => $expand!(-31), 994 => $expand!(-30), 995 => $expand!(-29), 996 => $expand!(-28), 997 => $expand!(-27), 998 => $expand!(-26), 999 => $expand!(-25), 1000 => $expand!(-24), 1001 => $expand!(-23), 1002 => $expand!(-22), 1003 => $expand!(-21), 1004 => $expand!(-20), 1005 => $expand!(-19), 1006 => $expand!(-18), 1007 => $expand!(-17), 1008 => $expand!(-16), 1009 => $expand!(-15), 1010 => $expand!(-14), 1011 => $expand!(-13), 1012 => $expand!(-12), 1013 => $expand!(-11), 1014 => $expand!(-10), 1015 => $expand!(-9), 1016 => $expand!(-8), 1017 => $expand!(-7), 1018 => $expand!(-6), 1019 => $expand!(-5), 1020 => $expand!(-4), 1021 => $expand!(-3), 1022 => $expand!(-2), 1023 => $expand!(-1), _ => $expand!(511), } }; } //immediate value: 0:63 macro_rules! constify_imm6 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b11_1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), 7 => $expand!(7), 8 => $expand!(8), 9 => $expand!(9), 10 => $expand!(10), 11 => $expand!(11), 12 => $expand!(12), 13 => $expand!(13), 14 => $expand!(14), 15 => $expand!(15), 16 => $expand!(16), 17 => $expand!(17), 18 => $expand!(18), 19 => $expand!(19), 20 => $expand!(20), 21 => $expand!(21), 22 => $expand!(22), 23 => $expand!(23), 24 => $expand!(24), 25 => $expand!(25), 26 => $expand!(26), 27 => $expand!(27), 28 => $expand!(28), 29 => $expand!(29), 30 => $expand!(30), 31 => $expand!(31), 32 => $expand!(32), 33 => $expand!(33), 34 => $expand!(34), 35 => $expand!(35), 36 => $expand!(36), 37 => $expand!(37), 38 => $expand!(38), 39 => $expand!(39), 40 => $expand!(40), 41 => $expand!(41), 42 => $expand!(42), 43 => $expand!(43), 44 => $expand!(44), 45 => $expand!(45), 46 => $expand!(46), 47 => $expand!(47), 48 => $expand!(48), 49 => $expand!(49), 50 => $expand!(50), 51 => $expand!(51), 52 => $expand!(52), 53 => $expand!(53), 54 => $expand!(54), 55 => $expand!(55), 56 => $expand!(56), 57 => $expand!(57), 58 => $expand!(58), 59 => $expand!(59), 60 => $expand!(60), 61 => $expand!(61), 62 => $expand!(62), _ => $expand!(63), } }; } //immediate value: 0:31 macro_rules! constify_imm5 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b1_1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), 7 => $expand!(7), 8 => $expand!(8), 9 => $expand!(9), 10 => $expand!(10), 11 => $expand!(11), 12 => $expand!(12), 13 => $expand!(13), 14 => $expand!(14), 15 => $expand!(15), 16 => $expand!(16), 17 => $expand!(17), 18 => $expand!(18), 19 => $expand!(19), 20 => $expand!(20), 21 => $expand!(21), 22 => $expand!(22), 23 => $expand!(23), 24 => $expand!(24), 25 => $expand!(25), 26 => $expand!(26), 27 => $expand!(27), 28 => $expand!(28), 29 => $expand!(29), 30 => $expand!(30), _ => $expand!(31), } }; } //immediate value: -16:15 macro_rules! constify_imm_s5 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b1_1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), 7 => $expand!(7), 8 => $expand!(8), 9 => $expand!(9), 10 => $expand!(10), 11 => $expand!(11), 12 => $expand!(12), 13 => $expand!(13), 14 => $expand!(14), 16 => $expand!(-16), 17 => $expand!(-15), 18 => $expand!(-14), 19 => $expand!(-13), 20 => $expand!(-12), 21 => $expand!(-11), 22 => $expand!(-10), 23 => $expand!(-9), 24 => $expand!(-8), 25 => $expand!(-7), 26 => $expand!(-6), 27 => $expand!(-5), 28 => $expand!(-4), 29 => $expand!(-3), 30 => $expand!(-2), 31 => $expand!(-1), _ => $expand!(15), } }; } //immediate value: 0:15 macro_rules! constify_imm4 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), 7 => $expand!(7), 8 => $expand!(8), 9 => $expand!(9), 10 => $expand!(10), 11 => $expand!(11), 12 => $expand!(12), 13 => $expand!(13), 14 => $expand!(14), _ => $expand!(15), } }; } //immediate value: 0:7 macro_rules! constify_imm3 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), _ => $expand!(7), } }; } //immediate value: 0:3 macro_rules! constify_imm2 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b11 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), _ => $expand!(3), } }; } //immediate value: 0:1 macro_rules! constify_imm1 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b1 { 0 => $expand!(0), _ => $expand!(1), } }; } core_arch-0.1.5/src/mod.rs010064400007650000024000000123741344322316300136050ustar0000000000000000//! `core_arch` #[macro_use] mod macros; #[cfg(any(target_arch = "arm", target_arch = "aarch64", dox))] mod acle; mod simd; #[cfg_attr( not(core_arch_docs), doc(include = "../stdsimd/crates/core_arch/src/core_arch_docs.md") )] #[cfg_attr(core_arch_docs, doc(include = "core_arch_docs.md"))] #[stable(feature = "simd_arch", since = "1.27.0")] pub mod arch { /// Platform-specific intrinsics for the `x86` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "x86", dox))] #[doc(cfg(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] pub mod x86 { #[stable(feature = "simd_x86", since = "1.27.0")] pub use crate::core_arch::x86::*; } /// Platform-specific intrinsics for the `x86_64` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "x86_64", dox))] #[doc(cfg(target_arch = "x86_64"))] #[stable(feature = "simd_x86", since = "1.27.0")] pub mod x86_64 { #[stable(feature = "simd_x86", since = "1.27.0")] pub use crate::core_arch::x86::*; #[stable(feature = "simd_x86", since = "1.27.0")] pub use crate::core_arch::x86_64::*; } /// Platform-specific intrinsics for the `arm` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "arm", dox))] #[doc(cfg(target_arch = "arm"))] #[unstable(feature = "stdsimd", issue = "27731")] pub mod arm { pub use crate::core_arch::arm::*; } /// Platform-specific intrinsics for the `aarch64` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "aarch64", dox))] #[doc(cfg(target_arch = "aarch64"))] #[unstable(feature = "stdsimd", issue = "27731")] pub mod aarch64 { pub use crate::core_arch::aarch64::*; pub use crate::core_arch::arm::*; } /// Platform-specific intrinsics for the `wasm32` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "wasm32", dox))] #[doc(cfg(target_arch = "wasm32"))] #[stable(feature = "simd_wasm32", since = "1.33.0")] pub mod wasm32 { #[stable(feature = "simd_wasm32", since = "1.33.0")] pub use crate::core_arch::wasm32::*; } /// Platform-specific intrinsics for the `mips` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "mips", dox))] #[doc(cfg(target_arch = "mips"))] #[unstable(feature = "stdsimd", issue = "27731")] pub mod mips { pub use crate::core_arch::mips::*; } /// Platform-specific intrinsics for the `mips64` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "mips64", dox))] #[doc(cfg(target_arch = "mips64"))] #[unstable(feature = "stdsimd", issue = "27731")] pub mod mips64 { pub use crate::core_arch::mips::*; } /// Platform-specific intrinsics for the `PowerPC` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "powerpc", dox))] #[doc(cfg(target_arch = "powerpc"))] #[unstable(feature = "stdsimd", issue = "27731")] pub mod powerpc { pub use crate::core_arch::powerpc::*; } /// Platform-specific intrinsics for the `PowerPC64` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "powerpc64", dox))] #[doc(cfg(target_arch = "powerpc64"))] #[unstable(feature = "stdsimd", issue = "27731")] pub mod powerpc64 { pub use crate::core_arch::powerpc64::*; } /// Platform-specific intrinsics for the `NVPTX` platform. /// /// See the [module documentation](../index.html) for more details. #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64", dox))] #[doc(cfg(any(target_arch = "nvptx", target_arch = "nvptx64")))] #[unstable(feature = "stdsimd", issue = "27731")] pub mod nvptx { pub use crate::core_arch::nvptx::*; } } mod simd_llvm; #[cfg(any(target_arch = "x86", target_arch = "x86_64", dox))] #[doc(cfg(any(target_arch = "x86", target_arch = "x86_64")))] mod x86; #[cfg(any(target_arch = "x86_64", dox))] #[doc(cfg(target_arch = "x86_64"))] mod x86_64; #[cfg(any(target_arch = "aarch64", dox))] #[doc(cfg(target_arch = "aarch64"))] mod aarch64; #[cfg(any(target_arch = "arm", target_arch = "aarch64", dox))] #[doc(cfg(any(target_arch = "arm", target_arch = "aarch64")))] mod arm; #[cfg(any(target_arch = "wasm32", dox))] #[doc(cfg(target_arch = "wasm32"))] mod wasm32; #[cfg(any(target_arch = "mips", target_arch = "mips64", dox))] #[doc(cfg(any(target_arch = "mips", target_arch = "mips64")))] mod mips; #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64", dox))] #[doc(cfg(any(target_arch = "powerpc", target_arch = "powerpc64")))] mod powerpc; #[cfg(any(target_arch = "powerpc64", dox))] #[doc(cfg(target_arch = "powerpc64"))] mod powerpc64; #[cfg(any(target_arch = "nvptx", target_arch = "nvptx64", dox))] #[doc(cfg(any(target_arch = "nvptx", target_arch = "nvptx64")))] mod nvptx; core_arch-0.1.5/src/nvptx/mod.rs010064400007650000024000000153121344322316300147570ustar0000000000000000//! NVPTX intrinsics (experimental) //! //! These intrinsics form the foundation of the CUDA //! programming model. //! //! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also //! the [LLVM NVPTX Backend documentation][llvm_docs]. //! //! [cuda_c]: //! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html //! [llvm_docs]: //! https://llvm.org/docs/NVPTXUsage.html use crate::ffi::c_void; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.nvvm.barrier0"] fn syncthreads() -> (); #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"] fn block_dim_x() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"] fn block_dim_y() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"] fn block_dim_z() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"] fn block_idx_x() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"] fn block_idx_y() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"] fn block_idx_z() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"] fn grid_dim_x() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"] fn grid_dim_y() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"] fn grid_dim_z() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"] fn thread_idx_x() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"] fn thread_idx_y() -> i32; #[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"] fn thread_idx_z() -> i32; } /// Synchronizes all threads in the block. #[inline] pub unsafe fn _syncthreads() -> () { syncthreads() } /// x-th thread-block dimension. #[inline] pub unsafe fn _block_dim_x() -> i32 { block_dim_x() } /// y-th thread-block dimension. #[inline] pub unsafe fn _block_dim_y() -> i32 { block_dim_y() } /// z-th thread-block dimension. #[inline] pub unsafe fn _block_dim_z() -> i32 { block_dim_z() } /// x-th thread-block index. #[inline] pub unsafe fn _block_idx_x() -> i32 { block_idx_x() } /// y-th thread-block index. #[inline] pub unsafe fn _block_idx_y() -> i32 { block_idx_y() } /// z-th thread-block index. #[inline] pub unsafe fn _block_idx_z() -> i32 { block_idx_z() } /// x-th block-grid dimension. #[inline] pub unsafe fn _grid_dim_x() -> i32 { grid_dim_x() } /// y-th block-grid dimension. #[inline] pub unsafe fn _grid_dim_y() -> i32 { grid_dim_y() } /// z-th block-grid dimension. #[inline] pub unsafe fn _grid_dim_z() -> i32 { grid_dim_z() } /// x-th thread index. #[inline] pub unsafe fn _thread_idx_x() -> i32 { thread_idx_x() } /// y-th thread index. #[inline] pub unsafe fn _thread_idx_y() -> i32 { thread_idx_y() } /// z-th thread index. #[inline] pub unsafe fn _thread_idx_z() -> i32 { thread_idx_z() } /// Generates the trap instruction `TRAP` #[inline] pub unsafe fn trap() -> ! { crate::intrinsics::abort() } // Basic CUDA syscall declarations. extern "C" { /// Print formatted output from a kernel to a host-side output stream. /// /// Syscall arguments: /// * `status`: The status value that is returned by `vprintf`. /// * `format`: A pointer to the format specifier input (uses common `printf` format). /// * `valist`: A pointer to the valist input. /// /// ``` /// #[repr(C)] /// struct PrintArgs(f32, f32, f32, i32); /// /// vprintf( /// "int(%f + %f) = int(%f) = %d\n".as_ptr(), /// transmute(&PrintArgs(a, b, a + b, (a + b) as i32)), /// ); /// ``` /// /// Sources: /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#formatted-output), /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). pub fn vprintf(format: *const u8, valist: *const c_void) -> i32; /// Allocate memory dynamically from a fixed-size heap in global memory. /// /// The CUDA in-kernel `malloc()` function allocates at least `size` bytes /// from the device heap and returns a pointer to the allocated memory /// or `NULL` if insufficient memory exists to fulfill the request. /// /// The returned pointer is guaranteed to be aligned to a 16-byte boundary. /// /// The memory allocated by a given CUDA thread via `malloc()` remains allocated /// for the lifetime of the CUDA context, or until it is explicitly released /// by a call to `free()`. It can be used by any other CUDA threads /// even from subsequent kernel launches. /// /// Sources: /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations), /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). // FIXME(denzp): assign `malloc` and `nothrow` attributes. pub fn malloc(size: usize) -> *mut c_void; /// Free previously dynamically allocated memory. /// /// The CUDA in-kernel `free()` function deallocates the memory pointed to by `ptr`, /// which must have been returned by a previous call to `malloc()`. If `ptr` is NULL, /// the call to `free()` is ignored. /// /// Any CUDA thread may free memory allocated by another thread, but care should be taken /// to ensure that the same pointer is not freed more than once. Repeated calls to `free()` /// with the same `ptr` has undefined behavior. /// /// Sources: /// [Programming Guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#dynamic-global-memory-allocation-and-operations), /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). // FIXME(denzp): assign `nothrow` attribute. pub fn free(ptr: *mut c_void); // Internal declaration of the syscall. Exported variant has // the `char_size` parameter set to `1` (single char size in bytes). fn __assertfail( message: *const u8, file: *const u8, line: u32, function: *const u8, char_size: usize, ); } /// Syscall to be used whenever the *assert expression produces a `false` value*. /// /// Syscall arguments: /// * `message`: The pointer to the string that should be output. /// * `file`: The pointer to the file name string associated with the assert. /// * `line`: The line number associated with the assert. /// * `function`: The pointer to the function name string associated with the assert. /// /// Source: /// [PTX Interoperability](https://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls). #[inline] pub unsafe fn __assert_fail(message: *const u8, file: *const u8, line: u32, function: *const u8) { __assertfail(message, file, line, function, 1) } core_arch-0.1.5/src/powerpc/altivec.rs010064400007650000024000001406501343447103600161370ustar0000000000000000//! PowerPC AltiVec intrinsics. //! //! AltiVec is a brandname trademarked by Freescale (previously Motorola) for //! the standard `Category:Vector` part of the Power ISA v.2.03 specification. //! This Category is also known as VMX (used by IBM), and "Velocity Engine" (a //! brand name previously used by Apple). //! //! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA //! NVlink)] and [POWER ISA v3.0B (for POWER9)]. //! //! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u //! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv #![allow(non_camel_case_types)] use crate::{ core_arch::{simd::*, simd_llvm::*}, mem::transmute, }; #[cfg(test)] use stdsimd_test::assert_instr; types! { /// PowerPC-specific 128-bit wide vector of sixteen packed `i8` pub struct vector_signed_char(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); /// PowerPC-specific 128-bit wide vector of sixteen packed `u8` pub struct vector_unsigned_char(u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8); /// PowerPC-specific 128-bit wide vector mask of sixteen packed elements pub struct vector_bool_char(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); /// PowerPC-specific 128-bit wide vector of eight packed `i16` pub struct vector_signed_short(i16, i16, i16, i16, i16, i16, i16, i16); /// PowerPC-specific 128-bit wide vector of eight packed `u16` pub struct vector_unsigned_short(u16, u16, u16, u16, u16, u16, u16, u16); /// PowerPC-specific 128-bit wide vector mask of eight packed elements pub struct vector_bool_short(i16, i16, i16, i16, i16, i16, i16, i16); // pub struct vector_pixel(???); /// PowerPC-specific 128-bit wide vector of four packed `i32` pub struct vector_signed_int(i32, i32, i32, i32); /// PowerPC-specific 128-bit wide vector of four packed `u32` pub struct vector_unsigned_int(u32, u32, u32, u32); /// PowerPC-specific 128-bit wide vector mask of four packed elements pub struct vector_bool_int(i32, i32, i32, i32); /// PowerPC-specific 128-bit wide vector of four packed `f32` pub struct vector_float(f32, f32, f32, f32); } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.ppc.altivec.vperm"] fn vperm( a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char, ) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.vmhaddshs"] fn vmhaddshs( a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, ) -> vector_signed_short; #[link_name = "llvm.ppc.altivec.vmhraddshs"] fn vmhraddshs( a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, ) -> vector_signed_short; #[link_name = "llvm.ppc.altivec.vmsumuhs"] fn vmsumuhs( a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int, ) -> vector_unsigned_int; #[link_name = "llvm.ppc.altivec.vmsumshs"] fn vmsumshs( a: vector_signed_short, b: vector_signed_short, c: vector_signed_int, ) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.vmsumubm"] fn vmsumubm( a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_int, ) -> vector_unsigned_int; #[link_name = "llvm.ppc.altivec.vmsummbm"] fn vmsummbm( a: vector_signed_char, b: vector_unsigned_char, c: vector_signed_int, ) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.vmsumuhm"] fn vmsumuhm( a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int, ) -> vector_unsigned_int; #[link_name = "llvm.ppc.altivec.vmsumshm"] fn vmsumshm( a: vector_signed_short, b: vector_signed_short, c: vector_signed_int, ) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.vmaddfp"] fn vmaddfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float; #[link_name = "llvm.ppc.altivec.vnmsubfp"] fn vnmsubfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float; #[link_name = "llvm.ppc.altivec.vsum2sws"] fn vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.vsum4ubs"] fn vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int; #[link_name = "llvm.ppc.altivec.vsum4sbs"] fn vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.vsum4shs"] fn vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.vmuleub"] fn vmuleub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; #[link_name = "llvm.ppc.altivec.vmulesb"] fn vmulesb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; #[link_name = "llvm.ppc.altivec.vmuleuh"] fn vmuleuh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; #[link_name = "llvm.ppc.altivec.vmulesh"] fn vmulesh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; #[link_name = "llvm.ppc.altivec.vmuloub"] fn vmuloub(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; #[link_name = "llvm.ppc.altivec.vmulosb"] fn vmulosb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; #[link_name = "llvm.ppc.altivec.vmulouh"] fn vmulouh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; #[link_name = "llvm.ppc.altivec.vmulosh"] fn vmulosh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; } mod sealed { use super::*; #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmuleub))] unsafe fn vec_vmuleub( a: vector_unsigned_char, b: vector_unsigned_char, ) -> vector_unsigned_short { vmuleub(a, b) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmulesb))] unsafe fn vec_vmulesb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short { vmulesb(a, b) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmuleuh))] unsafe fn vec_vmuleuh( a: vector_unsigned_short, b: vector_unsigned_short, ) -> vector_unsigned_int { vmuleuh(a, b) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmulesh))] unsafe fn vec_vmulesh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int { vmulesh(a, b) } pub trait VectorMule { unsafe fn vec_mule(self, b: Self) -> Result; } impl VectorMule for vector_unsigned_char { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_mule(self, b: Self) -> vector_unsigned_short { vmuleub(self, b) } } impl VectorMule for vector_signed_char { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_mule(self, b: Self) -> vector_signed_short { vmulesb(self, b) } } impl VectorMule for vector_unsigned_short { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_mule(self, b: Self) -> vector_unsigned_int { vmuleuh(self, b) } } impl VectorMule for vector_signed_short { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_mule(self, b: Self) -> vector_signed_int { vmulesh(self, b) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmuloub))] unsafe fn vec_vmuloub( a: vector_unsigned_char, b: vector_unsigned_char, ) -> vector_unsigned_short { vmuloub(a, b) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmulosb))] unsafe fn vec_vmulosb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short { vmulosb(a, b) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmulouh))] unsafe fn vec_vmulouh( a: vector_unsigned_short, b: vector_unsigned_short, ) -> vector_unsigned_int { vmulouh(a, b) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmulosh))] unsafe fn vec_vmulosh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int { vmulosh(a, b) } pub trait VectorMulo { unsafe fn vec_mulo(self, b: Self) -> Result; } impl VectorMulo for vector_unsigned_char { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_mulo(self, b: Self) -> vector_unsigned_short { vmuloub(self, b) } } impl VectorMulo for vector_signed_char { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_mulo(self, b: Self) -> vector_signed_short { vmulosb(self, b) } } impl VectorMulo for vector_unsigned_short { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_mulo(self, b: Self) -> vector_unsigned_int { vmulouh(self, b) } } impl VectorMulo for vector_signed_short { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_mulo(self, b: Self) -> vector_signed_int { vmulosh(self, b) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vsum4ubs))] unsafe fn vec_vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int { vsum4ubs(a, b) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vsum4sbs))] unsafe fn vec_vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int { vsum4sbs(a, b) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vsum4shs))] unsafe fn vec_vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int { vsum4shs(a, b) } pub trait VectorSum4s { unsafe fn vec_sum4s(self, b: Other) -> Other; } impl VectorSum4s for vector_unsigned_char { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_sum4s(self, b: vector_unsigned_int) -> vector_unsigned_int { vsum4ubs(self, b) } } impl VectorSum4s for vector_signed_char { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int { vsum4sbs(self, b) } } impl VectorSum4s for vector_signed_short { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int { vsum4shs(self, b) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vsum2sws))] unsafe fn vec_vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { vsum2sws(a, b) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vnmsubfp))] unsafe fn vec_vnmsubfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float { vnmsubfp(a, b, c) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmaddfp))] unsafe fn vec_vmaddfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float { vmaddfp(a, b, c) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmsumubm))] unsafe fn vec_vmsumubm( a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_int, ) -> vector_unsigned_int { vmsumubm(a, b, c) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmsummbm))] unsafe fn vec_vmsummbm( a: vector_signed_char, b: vector_unsigned_char, c: vector_signed_int, ) -> vector_signed_int { vmsummbm(a, b, c) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmsumuhm))] unsafe fn vec_vmsumuhm( a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int, ) -> vector_unsigned_int { vmsumuhm(a, b, c) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmsumshm))] unsafe fn vec_vmsumshm( a: vector_signed_short, b: vector_signed_short, c: vector_signed_int, ) -> vector_signed_int { vmsumshm(a, b, c) } pub trait VectorMsum { unsafe fn vec_msum(self, b: B, c: Other) -> Other; } impl VectorMsum for vector_unsigned_char { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_msum( self, b: vector_unsigned_char, c: vector_unsigned_int, ) -> vector_unsigned_int { vmsumubm(self, b, c) } } impl VectorMsum for vector_signed_char { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_msum( self, b: vector_unsigned_char, c: vector_signed_int, ) -> vector_signed_int { vmsummbm(self, b, c) } } impl VectorMsum for vector_unsigned_short { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_msum( self, b: vector_unsigned_short, c: vector_unsigned_int, ) -> vector_unsigned_int { vmsumuhm(self, b, c) } } impl VectorMsum for vector_signed_short { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_msum( self, b: vector_signed_short, c: vector_signed_int, ) -> vector_signed_int { vmsumshm(self, b, c) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmsumuhs))] unsafe fn vec_vmsumuhs( a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int, ) -> vector_unsigned_int { vmsumuhs(a, b, c) } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmsumshs))] unsafe fn vec_vmsumshs( a: vector_signed_short, b: vector_signed_short, c: vector_signed_int, ) -> vector_signed_int { vmsumshs(a, b, c) } pub trait VectorMsums { unsafe fn vec_msums(self, b: Self, c: Other) -> Other; } impl VectorMsums for vector_unsigned_short { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_msums(self, b: Self, c: vector_unsigned_int) -> vector_unsigned_int { vmsumuhs(self, b, c) } } impl VectorMsums for vector_signed_short { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_msums(self, b: Self, c: vector_signed_int) -> vector_signed_int { vmsumshs(self, b, c) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vperm))] unsafe fn vec_vperm( a: vector_signed_int, b: vector_signed_int, c: vector_unsigned_char, ) -> vector_signed_int { vperm(a, b, c) } pub trait VectorPerm { unsafe fn vec_vperm(self, b: Self, c: vector_unsigned_char) -> Self; } macro_rules! vector_perm { {$impl: ident} => { impl VectorPerm for $impl { #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_vperm(self, b: Self, c: vector_unsigned_char) -> Self { transmute(vec_vperm(transmute(self), transmute(b), c)) } } } } vector_perm! { vector_signed_char } vector_perm! { vector_unsigned_char } vector_perm! { vector_bool_char } vector_perm! { vector_signed_short } vector_perm! { vector_unsigned_short } vector_perm! { vector_bool_short } vector_perm! { vector_signed_int } vector_perm! { vector_unsigned_int } vector_perm! { vector_bool_int } vector_perm! { vector_float } pub trait VectorAdd { type Result; unsafe fn vec_add(self, other: Other) -> Self::Result; } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vaddubm))] pub unsafe fn vec_add_bc_sc(a: vector_bool_char, b: vector_signed_char) -> vector_signed_char { simd_add(transmute(a), b) } impl VectorAdd for vector_bool_char { type Result = vector_signed_char; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_signed_char) -> Self::Result { vec_add_bc_sc(self, other) } } impl VectorAdd for vector_signed_char { type Result = vector_signed_char; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_bool_char) -> Self::Result { other.vec_add(self) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vaddubm))] pub unsafe fn vec_add_sc_sc( a: vector_signed_char, b: vector_signed_char, ) -> vector_signed_char { simd_add(a, b) } impl VectorAdd for vector_signed_char { type Result = vector_signed_char; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_signed_char) -> Self::Result { vec_add_sc_sc(self, other) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vaddubm))] pub unsafe fn vec_add_bc_uc( a: vector_bool_char, b: vector_unsigned_char, ) -> vector_unsigned_char { simd_add(transmute(a), b) } impl VectorAdd for vector_bool_char { type Result = vector_unsigned_char; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_unsigned_char) -> Self::Result { vec_add_bc_uc(self, other) } } impl VectorAdd for vector_unsigned_char { type Result = vector_unsigned_char; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_bool_char) -> Self::Result { other.vec_add(self) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vaddubm))] pub unsafe fn vec_add_uc_uc( a: vector_unsigned_char, b: vector_unsigned_char, ) -> vector_unsigned_char { simd_add(a, b) } impl VectorAdd for vector_unsigned_char { type Result = vector_unsigned_char; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_unsigned_char) -> Self::Result { vec_add_uc_uc(self, other) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vadduhm))] pub unsafe fn vec_add_bs_ss( a: vector_bool_short, b: vector_signed_short, ) -> vector_signed_short { let a: i16x8 = transmute(a); let a: vector_signed_short = simd_cast(a); simd_add(a, b) } impl VectorAdd for vector_bool_short { type Result = vector_signed_short; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_signed_short) -> Self::Result { vec_add_bs_ss(self, other) } } impl VectorAdd for vector_signed_short { type Result = vector_signed_short; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_bool_short) -> Self::Result { other.vec_add(self) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vadduhm))] pub unsafe fn vec_add_ss_ss( a: vector_signed_short, b: vector_signed_short, ) -> vector_signed_short { simd_add(a, b) } impl VectorAdd for vector_signed_short { type Result = vector_signed_short; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_signed_short) -> Self::Result { vec_add_ss_ss(self, other) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vadduhm))] pub unsafe fn vec_add_bs_us( a: vector_bool_short, b: vector_unsigned_short, ) -> vector_unsigned_short { let a: i16x8 = transmute(a); let a: vector_unsigned_short = simd_cast(a); simd_add(a, b) } impl VectorAdd for vector_bool_short { type Result = vector_unsigned_short; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_unsigned_short) -> Self::Result { vec_add_bs_us(self, other) } } impl VectorAdd for vector_unsigned_short { type Result = vector_unsigned_short; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_bool_short) -> Self::Result { other.vec_add(self) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vadduhm))] pub unsafe fn vec_add_us_us( a: vector_unsigned_short, b: vector_unsigned_short, ) -> vector_unsigned_short { simd_add(a, b) } impl VectorAdd for vector_unsigned_short { type Result = vector_unsigned_short; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_unsigned_short) -> Self::Result { vec_add_us_us(self, other) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vadduwm))] pub unsafe fn vec_add_bi_si(a: vector_bool_int, b: vector_signed_int) -> vector_signed_int { let a: i32x4 = transmute(a); let a: vector_signed_int = simd_cast(a); simd_add(a, b) } impl VectorAdd for vector_bool_int { type Result = vector_signed_int; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_signed_int) -> Self::Result { vec_add_bi_si(self, other) } } impl VectorAdd for vector_signed_int { type Result = vector_signed_int; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_bool_int) -> Self::Result { other.vec_add(self) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vadduwm))] pub unsafe fn vec_add_si_si(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { simd_add(a, b) } impl VectorAdd for vector_signed_int { type Result = vector_signed_int; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_signed_int) -> Self::Result { vec_add_si_si(self, other) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vadduwm))] pub unsafe fn vec_add_bi_ui(a: vector_bool_int, b: vector_unsigned_int) -> vector_unsigned_int { let a: i32x4 = transmute(a); let a: vector_unsigned_int = simd_cast(a); simd_add(a, b) } impl VectorAdd for vector_bool_int { type Result = vector_unsigned_int; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_unsigned_int) -> Self::Result { vec_add_bi_ui(self, other) } } impl VectorAdd for vector_unsigned_int { type Result = vector_unsigned_int; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_bool_int) -> Self::Result { other.vec_add(self) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vadduwm))] pub unsafe fn vec_add_ui_ui( a: vector_unsigned_int, b: vector_unsigned_int, ) -> vector_unsigned_int { simd_add(a, b) } impl VectorAdd for vector_unsigned_int { type Result = vector_unsigned_int; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_unsigned_int) -> Self::Result { vec_add_ui_ui(self, other) } } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(xvaddsp))] pub unsafe fn vec_add_float_float(a: vector_float, b: vector_float) -> vector_float { simd_add(a, b) } impl VectorAdd for vector_float { type Result = vector_float; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_add(self, other: vector_float) -> Self::Result { vec_add_float_float(self, other) } } pub trait VectorMladd { type Result; unsafe fn vec_mladd(self, b: Other, c: Other) -> Self::Result; } #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmladduhm))] unsafe fn mladd(a: i16x8, b: i16x8, c: i16x8) -> i16x8 { simd_add(simd_mul(a, b), c) } macro_rules! vector_mladd { ($a: ident, $bc: ident, $d: ident) => { impl VectorMladd<$bc> for $a { type Result = $d; #[inline] #[target_feature(enable = "altivec")] unsafe fn vec_mladd(self, b: $bc, c: $bc) -> Self::Result { let a: i16x8 = transmute(self); let b: i16x8 = transmute(b); let c: i16x8 = transmute(c); transmute(mladd(a, b, c)) } } }; } vector_mladd! { vector_unsigned_short, vector_unsigned_short, vector_unsigned_short } vector_mladd! { vector_unsigned_short, vector_signed_short, vector_signed_short } vector_mladd! { vector_signed_short, vector_unsigned_short, vector_signed_short } vector_mladd! { vector_signed_short, vector_signed_short, vector_signed_short } } /// Vector add. #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_add(a: T, b: U) -> >::Result where T: sealed::VectorAdd, { a.vec_add(b) } /// Endian-biased intrinsics #[cfg(target_endian = "little")] mod endian { use super::*; /// Vector permute. #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_perm(a: T, b: T, c: vector_unsigned_char) -> T where T: sealed::VectorPerm, { // vperm has big-endian bias // // Xor the mask and flip the arguments let d = transmute(u8x16::new( 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, )); let c = simd_xor(c, d); b.vec_vperm(a, c) } /// Vector Sum Across Partial (1/2) Saturated #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { // vsum2sws has big-endian bias // // swap the even b elements with the odd ones let flip = transmute(u8x16::new( 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, )); let b = vec_perm(b, b, flip); let c = vsum2sws(a, b); vec_perm(c, c, flip) } // Even and Odd are swapped in little-endian /// Vector Multiply Even #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_mule(a: T, b: T) -> U where T: sealed::VectorMulo, { a.vec_mulo(b) } /// Vector Multiply Odd #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_mulo(a: T, b: T) -> U where T: sealed::VectorMule, { a.vec_mule(b) } } /// Vector Multiply Add Saturated #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmhaddshs))] pub unsafe fn vec_madds( a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, ) -> vector_signed_short { vmhaddshs(a, b, c) } /// Vector Multiply Low and Add Unsigned Half Word #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_mladd(a: T, b: U, c: U) -> >::Result where T: sealed::VectorMladd, { a.vec_mladd(b, c) } /// Vector Multiply Round and Add Saturated #[inline] #[target_feature(enable = "altivec")] #[cfg_attr(test, assert_instr(vmhraddshs))] pub unsafe fn vec_mradds( a: vector_signed_short, b: vector_signed_short, c: vector_signed_short, ) -> vector_signed_short { vmhraddshs(a, b, c) } /// Vector Multiply Sum #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_msum(a: T, b: B, c: U) -> U where T: sealed::VectorMsum, { a.vec_msum(b, c) } /// Vector Multiply Sum Saturated #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_msums(a: T, b: T, c: U) -> U where T: sealed::VectorMsums, { a.vec_msums(b, c) } /// Vector Multiply Add #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_madd(a: vector_float, b: vector_float, c: vector_float) -> vector_float { vmaddfp(a, b, c) } /// Vector Negative Multiply Subtract #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_nmsub(a: vector_float, b: vector_float, c: vector_float) -> vector_float { vnmsubfp(a, b, c) } /// Vector Sum Across Partial (1/4) Saturated #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_sum4s(a: T, b: U) -> U where T: sealed::VectorSum4s, { a.vec_sum4s(b) } #[cfg(target_endian = "big")] mod endian { use super::*; /// Vector permute. #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_perm(a: T, b: T, c: vector_unsigned_char) -> T where T: sealed::VectorPerm, { a.vec_vperm(b, c) } /// Vector Sum Across Partial (1/2) Saturated #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int { vsum2sws(a, b) } /// Vector Multiply Even #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_mule(a: T, b: T) -> U where T: sealed::VectorMule, { a.vec_mule(b) } /// Vector Multiply Odd #[inline] #[target_feature(enable = "altivec")] pub unsafe fn vec_mulo(a: T, b: T) -> U where T: sealed::VectorMulo, { a.vec_mulo(b) } } pub use self::endian::*; #[cfg(test)] mod tests { #[cfg(target_arch = "powerpc")] use crate::core_arch::arch::powerpc::*; #[cfg(target_arch = "powerpc64")] use crate::core_arch::arch::powerpc64::*; use std::mem::transmute; use crate::core_arch::simd::*; use stdsimd_test::simd_test; macro_rules! test_vec_perm { {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { #[simd_test(enable = "altivec")] unsafe fn $name() { let a: $longtype = transmute($shorttype::new($($a),+)); let b: $longtype = transmute($shorttype::new($($b),+)); let c: vector_unsigned_char = transmute(u8x16::new($($c),+)); let d = $shorttype::new($($d),+); let r: $shorttype = transmute(vec_perm(a, b, c)); assert_eq!(d, r); } } } test_vec_perm! {test_vec_perm_u8x16, u8x16, vector_unsigned_char, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115], [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], [0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]} test_vec_perm! {test_vec_perm_i8x16, i8x16, vector_signed_char, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115], [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], [0, 1, 100, 101, 2, 3, 102, 103, 4, 5, 104, 105, 6, 7, 106, 107]} test_vec_perm! {test_vec_perm_m8x16, m8x16, vector_bool_char, [false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false], [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true], [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], [false, false, true, true, false, false, true, true, false, false, true, true, false, false, true, true]} test_vec_perm! {test_vec_perm_u16x8, u16x8, vector_unsigned_short, [0, 1, 2, 3, 4, 5, 6, 7], [10, 11, 12, 13, 14, 15, 16, 17], [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], [0, 10, 1, 11, 2, 12, 3, 13]} test_vec_perm! {test_vec_perm_i16x8, i16x8, vector_signed_short, [0, 1, 2, 3, 4, 5, 6, 7], [10, 11, 12, 13, 14, 15, 16, 17], [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], [0, 10, 1, 11, 2, 12, 3, 13]} test_vec_perm! {test_vec_perm_m16x8, m16x8, vector_bool_short, [false, false, false, false, false, false, false, false], [true, true, true, true, true, true, true, true], [0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17], [false, true, false, true, false, true, false, true]} test_vec_perm! {test_vec_perm_u32x4, u32x4, vector_unsigned_int, [0, 1, 2, 3], [10, 11, 12, 13], [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], [0, 10, 1, 11]} test_vec_perm! {test_vec_perm_i32x4, i32x4, vector_signed_int, [0, 1, 2, 3], [10, 11, 12, 13], [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], [0, 10, 1, 11]} test_vec_perm! {test_vec_perm_m32x4, m32x4, vector_bool_int, [false, false, false, false], [true, true, true, true], [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], [false, true, false, true]} test_vec_perm! {test_vec_perm_f32x4, f32x4, vector_float, [0.0, 1.0, 2.0, 3.0], [1.0, 1.1, 1.2, 1.3], [0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17], [0.0, 1.0, 1.0, 1.1]} #[simd_test(enable = "altivec")] unsafe fn test_vec_madds() { let a: vector_signed_short = transmute(i16x8::new( 0 * 256, 1 * 256, 2 * 256, 3 * 256, 4 * 256, 5 * 256, 6 * 256, 7 * 256, )); let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); let c: vector_signed_short = transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, 21); assert_eq!(d, transmute(vec_madds(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_madd_float() { let a: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); let b: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); let c: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); let d = f32x4::new( 0.1 * 0.1 + 0.1, 0.2 * 0.2 + 0.2, 0.3 * 0.3 + 0.3, 0.4 * 0.4 + 0.4, ); assert_eq!(d, transmute(vec_madd(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_nmsub_float() { let a: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); let b: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); let c: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4)); let d = f32x4::new( -(0.1 * 0.1 - 0.1), -(0.2 * 0.2 - 0.2), -(0.3 * 0.3 - 0.3), -(0.4 * 0.4 - 0.4), ); assert_eq!(d, transmute(vec_nmsub(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mradds() { let a: vector_signed_short = transmute(i16x8::new( 0 * 256, 1 * 256, 2 * 256, 3 * 256, 4 * 256, 5 * 256, 6 * 256, 7 * 256, )); let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); let c: vector_signed_short = transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::max_value() - 1)); let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, i16::max_value()); assert_eq!(d, transmute(vec_mradds(a, b, c))); } macro_rules! test_vec_mladd { {$name:ident, $sa:ident, $la:ident, $sbc:ident, $lbc:ident, $sd:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { #[simd_test(enable = "altivec")] unsafe fn $name() { let a: $la = transmute($sa::new($($a),+)); let b: $lbc = transmute($sbc::new($($b),+)); let c = transmute($sbc::new($($c),+)); let d = $sd::new($($d),+); assert_eq!(d, transmute(vec_mladd(a, b, c))); } } } test_vec_mladd! { test_vec_mladd_u16x8_u16x8, u16x8, vector_unsigned_short, u16x8, vector_unsigned_short, u16x8, [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56] } test_vec_mladd! { test_vec_mladd_u16x8_i16x8, u16x8, vector_unsigned_short, i16x8, vector_unsigned_short, i16x8, [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56] } test_vec_mladd! { test_vec_mladd_i16x8_u16x8, i16x8, vector_signed_short, u16x8, vector_unsigned_short, i16x8, [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56] } test_vec_mladd! { test_vec_mladd_i16x8_i16x8, i16x8, vector_signed_short, i16x8, vector_unsigned_short, i16x8, [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56] } #[simd_test(enable = "altivec")] unsafe fn test_vec_msum_unsigned_char() { let a: vector_unsigned_char = transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); let b: vector_unsigned_char = transmute(u8x16::new( 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, )); let c: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3)); let d = u32x4::new( (0 + 1 + 2 + 3) * 255 + 0, (4 + 5 + 6 + 7) * 255 + 1, (0 + 1 + 2 + 3) * 255 + 2, (4 + 5 + 6 + 7) * 255 + 3, ); assert_eq!(d, transmute(vec_msum(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_msum_signed_char() { let a: vector_signed_char = transmute(i8x16::new( 0, -1, 2, -3, 1, -1, 1, -1, 0, 1, 2, 3, 4, -5, -6, -7, )); let b: vector_unsigned_char = transmute(i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)); let c: vector_signed_int = transmute(u32x4::new(0, 1, 2, 3)); let d = i32x4::new( (0 - 1 + 2 - 3) + 0, (0) + 1, (0 + 1 + 2 + 3) + 2, (4 - 5 - 6 - 7) + 3, ); assert_eq!(d, transmute(vec_msum(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_msum_unsigned_short() { let a: vector_unsigned_short = transmute(u16x8::new( 0 * 256, 1 * 256, 2 * 256, 3 * 256, 4 * 256, 5 * 256, 6 * 256, 7 * 256, )); let b: vector_unsigned_short = transmute(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); let c: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3)); let d = u32x4::new( (0 + 1) * 256 * 256 + 0, (2 + 3) * 256 * 256 + 1, (4 + 5) * 256 * 256 + 2, (6 + 7) * 256 * 256 + 3, ); assert_eq!(d, transmute(vec_msum(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_msum_signed_short() { let a: vector_signed_short = transmute(i16x8::new( 0 * 256, -1 * 256, 2 * 256, -3 * 256, 4 * 256, -5 * 256, 6 * 256, -7 * 256, )); let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); let c: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); let d = i32x4::new( (0 - 1) * 256 * 256 + 0, (2 - 3) * 256 * 256 + 1, (4 - 5) * 256 * 256 + 2, (6 - 7) * 256 * 256 + 3, ); assert_eq!(d, transmute(vec_msum(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_msums_unsigned() { let a: vector_unsigned_short = transmute(u16x8::new( 0 * 256, 1 * 256, 2 * 256, 3 * 256, 4 * 256, 5 * 256, 6 * 256, 7 * 256, )); let b: vector_unsigned_short = transmute(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); let c: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3)); let d = u32x4::new( (0 + 1) * 256 * 256 + 0, (2 + 3) * 256 * 256 + 1, (4 + 5) * 256 * 256 + 2, (6 + 7) * 256 * 256 + 3, ); assert_eq!(d, transmute(vec_msums(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_msums_signed() { let a: vector_signed_short = transmute(i16x8::new( 0 * 256, -1 * 256, 2 * 256, -3 * 256, 4 * 256, -5 * 256, 6 * 256, -7 * 256, )); let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256)); let c: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); let d = i32x4::new( (0 - 1) * 256 * 256 + 0, (2 - 3) * 256 * 256 + 1, (4 - 5) * 256 * 256 + 2, (6 - 7) * 256 * 256 + 3, ); assert_eq!(d, transmute(vec_msums(a, b, c))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_sum2s() { let a: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); let b: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); let d = i32x4::new(0, 0 + 1 + 1, 0, 2 + 3 + 3); assert_eq!(d, transmute(vec_sum2s(a, b))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_sum4s_unsigned_char() { let a: vector_unsigned_char = transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); let b: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3)); let d = u32x4::new( 0 + 1 + 2 + 3 + 0, 4 + 5 + 6 + 7 + 1, 0 + 1 + 2 + 3 + 2, 4 + 5 + 6 + 7 + 3, ); assert_eq!(d, transmute(vec_sum4s(a, b))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_sum4s_signed_char() { let a: vector_signed_char = transmute(i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); let b: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); let d = i32x4::new( 0 + 1 + 2 + 3 + 0, 4 + 5 + 6 + 7 + 1, 0 + 1 + 2 + 3 + 2, 4 + 5 + 6 + 7 + 3, ); assert_eq!(d, transmute(vec_sum4s(a, b))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_sum4s_signed_short() { let a: vector_signed_short = transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); let b: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3)); let d = i32x4::new(0 + 1 + 0, 2 + 3 + 1, 4 + 5 + 2, 6 + 7 + 3); assert_eq!(d, transmute(vec_sum4s(a, b))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mule_unsigned_char() { let a: vector_unsigned_char = transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); let d = u16x8::new(0 * 0, 2 * 2, 4 * 4, 6 * 6, 0 * 0, 2 * 2, 4 * 4, 6 * 6); assert_eq!(d, transmute(vec_mule(a, a))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mule_signed_char() { let a: vector_signed_char = transmute(i8x16::new( 0, 1, -2, 3, -4, 5, -6, 7, 0, 1, 2, 3, 4, 5, 6, 7, )); let d = i16x8::new(0 * 0, 2 * 2, 4 * 4, 6 * 6, 0 * 0, 2 * 2, 4 * 4, 6 * 6); assert_eq!(d, transmute(vec_mule(a, a))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mule_unsigned_short() { let a: vector_unsigned_short = transmute(u16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); let d = u32x4::new(0 * 0, 2 * 2, 4 * 4, 6 * 6); assert_eq!(d, transmute(vec_mule(a, a))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mule_signed_short() { let a: vector_signed_short = transmute(i16x8::new(0, 1, -2, 3, -4, 5, -6, 7)); let d = i32x4::new(0 * 0, 2 * 2, 4 * 4, 6 * 6); assert_eq!(d, transmute(vec_mule(a, a))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mulo_unsigned_char() { let a: vector_unsigned_char = transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7)); let d = u16x8::new(1 * 1, 3 * 3, 5 * 5, 7 * 7, 1 * 1, 3 * 3, 5 * 5, 7 * 7); assert_eq!(d, transmute(vec_mulo(a, a))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mulo_signed_char() { let a: vector_signed_char = transmute(i8x16::new( 0, 1, -2, 3, -4, 5, -6, 7, 0, 1, 2, 3, 4, 5, 6, 7, )); let d = i16x8::new(1 * 1, 3 * 3, 5 * 5, 7 * 7, 1 * 1, 3 * 3, 5 * 5, 7 * 7); assert_eq!(d, transmute(vec_mulo(a, a))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mulo_unsigned_short() { let a: vector_unsigned_short = transmute(u16x8::new(0, 1, 2, 3, 4, 5, 6, 7)); let d = u32x4::new(1 * 1, 3 * 3, 5 * 5, 7 * 7); assert_eq!(d, transmute(vec_mulo(a, a))); } #[simd_test(enable = "altivec")] unsafe fn test_vec_mulo_signed_short() { let a: vector_signed_short = transmute(i16x8::new(0, 1, -2, 3, -4, 5, -6, 7)); let d = i32x4::new(1 * 1, 3 * 3, 5 * 5, 7 * 7); assert_eq!(d, transmute(vec_mulo(a, a))); } #[simd_test(enable = "altivec")] unsafe fn vec_add_i32x4_i32x4() { let x = i32x4::new(1, 2, 3, 4); let y = i32x4::new(4, 3, 2, 1); let x: vector_signed_int = transmute(x); let y: vector_signed_int = transmute(y); let z = vec_add(x, y); assert_eq!(i32x4::splat(5), transmute(z)); } } core_arch-0.1.5/src/powerpc/mod.rs010064400007650000024000000005531343447103600152640ustar0000000000000000//! PowerPC intrinsics #[cfg(target_feature = "altivec")] mod altivec; #[cfg(target_feature = "altivec")] pub use self::altivec::*; mod vsx; pub use self::vsx::*; #[cfg(test)] use stdsimd_test::assert_instr; /// Generates the trap instruction `TRAP` #[cfg_attr(test, assert_instr(trap))] #[inline] pub unsafe fn trap() -> ! { crate::intrinsics::abort() } core_arch-0.1.5/src/powerpc/vsx.rs010064400007650000024000000104121343447103600153200ustar0000000000000000//! PowerPC Vector Scalar eXtensions (VSX) intrinsics. //! //! The references are: [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA //! NVlink)] and [POWER ISA v3.0B (for POWER9)]. //! //! [POWER ISA v2.07B (for POWER8 & POWER8 with NVIDIA NVlink)]: https://ibm.box.com/s/jd5w15gz301s5b5dt375mshpq9c3lh4u //! [POWER ISA v3.0B (for POWER9)]: https://ibm.box.com/s/1hzcwkwf8rbju5h9iyf44wm94amnlcrv #![allow(non_camel_case_types)] use crate::core_arch::simd_llvm::*; #[cfg(test)] use stdsimd_test::assert_instr; use crate::mem; types! { // pub struct vector_Float16 = f16x8; /// PowerPC-specific 128-bit wide vector of two packed `i64` pub struct vector_signed_long(i64, i64); /// PowerPC-specific 128-bit wide vector of two packed `u64` pub struct vector_unsigned_long(u64, u64); /// PowerPC-specific 128-bit wide vector mask of two elements pub struct vector_bool_long(i64, i64); /// PowerPC-specific 128-bit wide vector of two packed `f64` pub struct vector_double(f64, f64); // pub struct vector_signed_long_long = vector_signed_long; // pub struct vector_unsigned_long_long = vector_unsigned_long; // pub struct vector_bool_long_long = vector_bool_long; // pub struct vector_signed___int128 = i128x1; // pub struct vector_unsigned___int128 = i128x1; } mod sealed { use super::*; use crate::core_arch::simd::*; pub trait VectorPermDI { unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self; } // xxpermdi has an big-endian bias and extended mnemonics #[inline] #[target_feature(enable = "vsx")] #[cfg_attr(all(test, target_endian = "little"), assert_instr(xxmrgld, dm = 0x0))] #[cfg_attr(all(test, target_endian = "big"), assert_instr(xxspltd, dm = 0x0))] unsafe fn xxpermdi(a: i64x2, b: i64x2, dm: u8) -> i64x2 { match dm & 0b11 { 0 => simd_shuffle2(a, b, [0b00, 0b10]), 1 => simd_shuffle2(a, b, [0b01, 0b10]), 2 => simd_shuffle2(a, b, [0b00, 0b11]), _ => simd_shuffle2(a, b, [0b01, 0b11]), } } macro_rules! vec_xxpermdi { {$impl: ident} => { impl VectorPermDI for $impl { #[inline] #[target_feature(enable = "vsx")] unsafe fn vec_xxpermdi(self, b: Self, dm: u8) -> Self { mem::transmute(xxpermdi(mem::transmute(self), mem::transmute(b), dm)) } } } } vec_xxpermdi! { vector_unsigned_long } vec_xxpermdi! { vector_signed_long } vec_xxpermdi! { vector_bool_long } vec_xxpermdi! { vector_double } } /// Vector permute. #[inline] #[target_feature(enable = "vsx")] #[rustc_args_required_const(2)] pub unsafe fn vec_xxpermdi(a: T, b: T, dm: u8) -> T where T: sealed::VectorPermDI, { a.vec_xxpermdi(b, dm) } #[cfg(test)] mod tests { #[cfg(target_arch = "powerpc")] use crate::core_arch::arch::powerpc::*; #[cfg(target_arch = "powerpc64")] use crate::core_arch::arch::powerpc64::*; use crate::core_arch::simd::*; use stdsimd_test::simd_test; macro_rules! test_vec_xxpermdi { {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => { #[simd_test(enable = "vsx")] unsafe fn $name() { let a: $longtype = ::mem::transmute($shorttype::new($($a),+, $($b),+)); let b = ::mem::transmute($shorttype::new($($c),+, $($d),+)); assert_eq!($shorttype::new($($a),+, $($c),+), ::mem::transmute(vec_xxpermdi(a, b, 0))); assert_eq!($shorttype::new($($b),+, $($c),+), ::mem::transmute(vec_xxpermdi(a, b, 1))); assert_eq!($shorttype::new($($a),+, $($d),+), ::mem::transmute(vec_xxpermdi(a, b, 2))); assert_eq!($shorttype::new($($b),+, $($d),+), ::mem::transmute(vec_xxpermdi(a, b, 3))); } } } test_vec_xxpermdi! {test_vec_xxpermdi_u64x2, u64x2, vector_unsigned_long, [0], [1], [2], [3]} test_vec_xxpermdi! {test_vec_xxpermdi_i64x2, i64x2, vector_signed_long, [0], [-1], [2], [-3]} test_vec_xxpermdi! {test_vec_xxpermdi_m64x2, m64x2, vector_bool_long, [false], [true], [false], [true]} test_vec_xxpermdi! {test_vec_xxpermdi_f64x2, f64x2, vector_double, [0.0], [1.0], [2.0], [3.0]} } core_arch-0.1.5/src/powerpc64/mod.rs010064400007650000024000000004441343447103600154350ustar0000000000000000//! PowerPC 64 //! //! The reference is the [64-Bit ELF V2 ABI Specification - Power //! Architecture]. //! //! [64-Bit ELF V2 ABI Specification - Power Architecture]: http://openpowerfoundation.org/wp-content/uploads/resources/leabi/leabi-20170510.pdf pub use crate::core_arch::powerpc::*; core_arch-0.1.5/src/simd.rs010064400007650000024000000141541345561510300137620ustar0000000000000000//! Internal `#[repr(simd)]` types #![rustfmt::skip] #![allow(non_camel_case_types)] macro_rules! simd_ty { ($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => { #[repr(simd)] #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) struct $id($(pub $elem_ty),*); #[allow(clippy::use_self)] impl $id { #[inline] pub(crate) const fn new($($elem_name: $elem_ty),*) -> Self { $id($($elem_name),*) } #[inline] pub(crate) const fn splat(value: $ety) -> Self { $id($({ #[allow(non_camel_case_types, dead_code)] struct $elem_name; value }),*) } #[inline] pub(crate) fn extract(self, index: usize) -> $ety { unsafe { crate::core_arch::simd_llvm::simd_extract(self, index as u32) } } } } } macro_rules! simd_m_ty { ($id:ident [$ety:ident]: $($elem_ty:ident),* | $($elem_name:ident),*) => { #[repr(simd)] #[derive(Copy, Clone, Debug, PartialEq)] pub(crate) struct $id($(pub $elem_ty),*); #[allow(clippy::use_self)] impl $id { #[inline] const fn bool_to_internal(x: bool) -> $ety { [0 as $ety, !(0 as $ety)][x as usize] } #[inline] pub(crate) const fn new($($elem_name: bool),*) -> Self { $id($(Self::bool_to_internal($elem_name)),*) } #[inline] pub(crate) const fn splat(value: bool) -> Self { $id($({ #[allow(non_camel_case_types, dead_code)] struct $elem_name; Self::bool_to_internal(value) }),*) } #[inline] pub(crate) fn extract(self, index: usize) -> bool { let r: $ety = unsafe { crate::core_arch::simd_llvm::simd_extract(self, index as u32) }; r != 0 } } } } // 16-bit wide types: simd_ty!(u8x2[u8]: u8, u8 | x0, x1); simd_ty!(i8x2[i8]: i8, i8 | x0, x1); // 32-bit wide types: simd_ty!(u8x4[u8]: u8, u8, u8, u8 | x0, x1, x2, x3); simd_ty!(u16x2[u16]: u16, u16 | x0, x1); simd_ty!(i8x4[i8]: i8, i8, i8, i8 | x0, x1, x2, x3); simd_ty!(i16x2[i16]: i16, i16 | x0, x1); // 64-bit wide types: simd_ty!(u8x8[u8]: u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3); simd_ty!(u32x2[u32]: u32, u32 | x0, x1); simd_ty!(u64x1[u64]: u64 | x1); simd_ty!(i8x8[i8]: i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3); simd_ty!(i32x2[i32]: i32, i32 | x0, x1); simd_ty!(i64x1[i64]: i64 | x1); simd_ty!(f32x2[f32]: f32, f32 | x0, x1); // 128-bit wide types: simd_ty!(u8x16[u8]: u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!(u16x8[u16]: u16, u16, u16, u16, u16, u16, u16, u16 | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3); simd_ty!(u64x2[u64]: u64, u64 | x0, x1); simd_ty!(i8x16[i8]: i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!(i16x8[i16]: i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); simd_ty!(i64x2[i64]: i64, i64 | x0, x1); simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3); simd_ty!(f64x2[f64]: f64, f64 | x0, x1); simd_m_ty!(m8x16[i8]: i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_m_ty!(m16x8[i16]: i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7); simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1); // 256-bit wide types: simd_ty!(u8x32[u8]: u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); simd_ty!(u16x16[u16]: u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!(u32x8[u32]: u32, u32, u32, u32, u32, u32, u32, u32 | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3); simd_ty!(i8x32[i8]: i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 ); simd_ty!(i16x16[i16]: i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ); simd_ty!(i32x8[i32]: i32, i32, i32, i32, i32, i32, i32, i32 | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3); // 512-bit wide types: simd_ty!(i32x16[i32]: i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15); simd_ty!(i64x8[i64]: i64, i64, i64, i64, i64, i64, i64, i64 | x0, x1, x2, x3, x4, x5, x6, x7); core_arch-0.1.5/src/simd_llvm.rs010064400007650000024000000044611345562034300150160ustar0000000000000000//! LLVM's simd platform intrinsics //! //! TODO: should use `link_llvm_intrinsic` instead: issue #112 extern "platform-intrinsic" { pub fn simd_eq(x: T, y: T) -> U; pub fn simd_ne(x: T, y: T) -> U; pub fn simd_lt(x: T, y: T) -> U; pub fn simd_le(x: T, y: T) -> U; pub fn simd_gt(x: T, y: T) -> U; pub fn simd_ge(x: T, y: T) -> U; pub fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U; pub fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U; pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U; pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U; pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U; pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U; pub fn simd_shuffle128(x: T, y: T, idx: [u32; 128]) -> U; pub fn simd_insert(x: T, idx: u32, val: U) -> T; pub fn simd_extract(x: T, idx: u32) -> U; pub fn simd_cast(x: T) -> U; pub fn simd_add(x: T, y: T) -> T; pub fn simd_sub(x: T, y: T) -> T; pub fn simd_mul(x: T, y: T) -> T; pub fn simd_div(x: T, y: T) -> T; pub fn simd_rem(x: T, y: T) -> T; pub fn simd_shl(x: T, y: T) -> T; pub fn simd_shr(x: T, y: T) -> T; pub fn simd_and(x: T, y: T) -> T; pub fn simd_or(x: T, y: T) -> T; pub fn simd_xor(x: T, y: T) -> T; pub fn simd_reduce_add_unordered(x: T) -> U; pub fn simd_reduce_mul_unordered(x: T) -> U; pub fn simd_reduce_add_ordered(x: T, acc: U) -> U; pub fn simd_reduce_mul_ordered(x: T, acc: U) -> U; pub fn simd_reduce_min(x: T) -> U; pub fn simd_reduce_max(x: T) -> U; pub fn simd_reduce_min_nanless(x: T) -> U; pub fn simd_reduce_max_nanless(x: T) -> U; pub fn simd_reduce_and(x: T) -> U; pub fn simd_reduce_or(x: T) -> U; pub fn simd_reduce_xor(x: T) -> U; pub fn simd_reduce_all(x: T) -> bool; pub fn simd_reduce_any(x: T) -> bool; pub fn simd_select(m: M, a: T, b: T) -> T; pub fn simd_select_bitmask(m: M, a: T, b: T) -> T; pub fn simd_fmin(a: T, b: T) -> T; pub fn simd_fmax(a: T, b: T) -> T; pub fn simd_fsqrt(a: T) -> T; pub fn simd_fma(a: T, b: T, c: T) -> T; } core_arch-0.1.5/src/v64.rs010064400007650000024000000043341343447103600134460ustar0000000000000000//! 64-bit wide vector types use crate::prelude::v1::*; use crate::core_arch::simd_llvm::*; define_ty_doc! { f32x2, f32, f32 | /// A 64-bit vector with 2 `f32` lanes. } define_impl! { f32x2, f32, 2, i32x2, x0, x1 } define_ty_doc! { u32x2, u32, u32 | /// A 64-bit vector with 2 `u32` lanes. } define_impl! { u32x2, u32, 2, i32x2, x0, x1 } define_ty! { i32x2, i32, i32 } define_impl! { i32x2, i32, 2, i32x2, x0, x1 } define_ty! { u16x4, u16, u16, u16, u16 } define_impl! { u16x4, u16, 4, i16x4, x0, x1, x2, x3 } define_ty! { i16x4, i16, i16, i16, i16 } define_impl! { i16x4, i16, 4, i16x4, x0, x1, x2, x3 } define_ty! { u8x8, u8, u8, u8, u8, u8, u8, u8, u8 } define_impl! { u8x8, u8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 } define_ty! { i8x8, i8, i8, i8, i8, i8, i8, i8, i8 } define_impl! { i8x8, i8, 8, i8x8, x0, x1, x2, x3, x4, x5, x6, x7 } define_from!(u32x2, i32x2, u16x4, i16x4, u8x8, i8x8); define_from!(i32x2, u32x2, u16x4, i16x4, u8x8, i8x8); define_from!(u16x4, u32x2, i32x2, i16x4, u8x8, i8x8); define_from!(i16x4, u32x2, i32x2, u16x4, u8x8, i8x8); define_from!(u8x8, u32x2, i32x2, u16x4, i16x4, i8x8); define_from!(i8x8, u32x2, i32x2, u16x4, i16x4, u8x8); define_common_ops!(f32x2, u32x2, i32x2, u16x4, i16x4, u8x8, i8x8); define_float_ops!(f32x2); define_integer_ops!( (u32x2, u32), (i32x2, i32), (u16x4, u16), (i16x4, i16), (u8x8, u8), (i8x8, i8) ); define_signed_integer_ops!(i32x2, i16x4, i8x8); define_casts!( (f32x2, f64x2, as_f64x2), (f32x2, u32x2, as_u32x2), (f32x2, i32x2, as_i32x2), (u32x2, f32x2, as_f32x2), (u32x2, i32x2, as_i32x2), (i32x2, f32x2, as_f32x2), (i32x2, u32x2, as_u32x2), (u16x4, i16x4, as_i16x4), (i16x4, u16x4, as_u16x4), (u8x8, i8x8, as_i8x8), (i8x8, u8x8, as_u8x8), (i8x8, i16x8, as_i16x8), (u8x8, i16x8, as_i16x8), (i16x4, i32x4, as_i32x4), (i32x2, i64x2, as_i64x2), (u8x8, u16x8, as_u16x8), (u16x4, u32x4, as_u32x4), (u16x4, i32x4, as_i32x4), (u32x2, u64x2, as_u64x2), (u32x2, i64x2, as_i64x2) ); #[cfg(test)] mod tests { use super::*; #[test] fn operators() { test_ops_si!(i8x8, i16x4, i32x2); test_ops_ui!(u8x8, u16x4, u32x2); test_ops_f!(f32x2); } } core_arch-0.1.5/src/wasm32/atomic.rs010064400007650000024000000115001342163752400154120ustar0000000000000000//! Intrinsics associated with WebAssembly's upcoming threads proposal. //! //! These intrinsics are all unstable because they're not actually stable in //! WebAssembly itself yet. The signatures may change as [the //! specification][spec] is updated. //! //! [spec]: https://github.com/WebAssembly/threads #![cfg(any(target_feature = "atomics", dox))] #[cfg(test)] use stdsimd_test::assert_instr; #[cfg(test)] use wasm_bindgen_test::wasm_bindgen_test; extern "C" { #[link_name = "llvm.wasm.atomic.wait.i32"] fn llvm_atomic_wait_i32(ptr: *mut i32, exp: i32, timeout: i64) -> i32; #[link_name = "llvm.wasm.atomic.wait.i64"] fn llvm_atomic_wait_i64(ptr: *mut i64, exp: i64, timeout: i64) -> i32; #[link_name = "llvm.wasm.atomic.notify"] fn llvm_atomic_notify(ptr: *mut i32, cnt: i32) -> i32; } /// Corresponding intrinsic to wasm's [`i32.atomic.wait` instruction][instr] /// /// This function, when called, will block the current thread if the memory /// pointed to by `ptr` is equal to `expression` (performing this action /// atomically). /// /// The argument `timeout_ns` is a maxinum number of nanoseconds the calling /// thread will be blocked for, if it blocks. If the timeout is negative then /// the calling thread will be blocked forever. /// /// The calling thread can only be woken up with a call to the `wake` intrinsic /// once it has been blocked. Changing the memory behind `ptr` will not wake /// the thread once it's blocked. /// /// # Return value /// /// * 0 - indicates that the thread blocked and then was woken up /// * 1 - the loaded value from `ptr` didn't match `expression`, the thread /// didn't block /// * 2 - the thread blocked, but the timeout expired. /// /// # Availability /// /// This intrinsic is only available **when the standard library itself is /// compiled with the `atomics` target feature**. This version of the standard /// library is not obtainable via `rustup`, but rather will require the /// standard library to be compiled from source. /// /// [instr]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#wait #[inline] #[cfg_attr(test, assert_instr("i32.atomic.wait"))] pub unsafe fn i32_atomic_wait(ptr: *mut i32, expression: i32, timeout_ns: i64) -> i32 { llvm_atomic_wait_i32(ptr, expression, timeout_ns) } /// Corresponding intrinsic to wasm's [`i64.atomic.wait` instruction][instr] /// /// This function, when called, will block the current thread if the memory /// pointed to by `ptr` is equal to `expression` (performing this action /// atomically). /// /// The argument `timeout_ns` is a maxinum number of nanoseconds the calling /// thread will be blocked for, if it blocks. If the timeout is negative then /// the calling thread will be blocked forever. /// /// The calling thread can only be woken up with a call to the `wake` intrinsic /// once it has been blocked. Changing the memory behind `ptr` will not wake /// the thread once it's blocked. /// /// # Return value /// /// * 0 - indicates that the thread blocked and then was woken up /// * 1 - the loaded value from `ptr` didn't match `expression`, the thread /// didn't block /// * 2 - the thread blocked, but the timeout expired. /// /// # Availability /// /// This intrinsic is only available **when the standard library itself is /// compiled with the `atomics` target feature**. This version of the standard /// library is not obtainable via `rustup`, but rather will require the /// standard library to be compiled from source. /// /// [instr]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#wait #[inline] #[cfg_attr(test, assert_instr("i64.atomic.wait"))] pub unsafe fn i64_atomic_wait(ptr: *mut i64, expression: i64, timeout_ns: i64) -> i32 { llvm_atomic_wait_i64(ptr, expression, timeout_ns) } /// Corresponding intrinsic to wasm's [`atomic.notify` instruction][instr] /// /// This function will notify a number of threads blocked on the address /// indicated by `ptr`. Threads previously blocked with the `i32_atomic_wait` /// and `i64_atomic_wait` functions above will be woken up. /// /// The `waiters` argument indicates how many waiters should be woken up (a /// maximum). If the value is zero no waiters are woken up. /// /// # Return value /// /// Returns the number of waiters which were actually notified. /// /// # Availability /// /// This intrinsic is only available **when the standard library itself is /// compiled with the `atomics` target feature**. This version of the standard /// library is not obtainable via `rustup`, but rather will require the /// standard library to be compiled from source. /// /// [instr]: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#wake #[inline] #[cfg_attr(test, assert_instr("atomic.wake"))] pub unsafe fn atomic_notify(ptr: *mut i32, waiters: u32) -> u32 { llvm_atomic_notify(ptr, waiters as i32) as u32 } core_arch-0.1.5/src/wasm32/memory.rs010064400007650000024000000050721343447103600154530ustar0000000000000000#[cfg(test)] use stdsimd_test::assert_instr; #[cfg(test)] use wasm_bindgen_test::wasm_bindgen_test; extern "C" { #[link_name = "llvm.wasm.memory.grow.i32"] fn llvm_memory_grow(mem: i32, pages: i32) -> i32; #[link_name = "llvm.wasm.memory.size.i32"] fn llvm_memory_size(mem: i32) -> i32; } /// Corresponding intrinsic to wasm's [`memory.size` instruction][instr] /// /// This function, when called, will return the current memory size in units of /// pages. The current WebAssembly page size is 65536 bytes (64 KB). /// /// The argument `mem` is the numerical index of which memory to return the /// size of. Note that currently the WebAssembly specification only supports one /// memory, so it is required that zero is passed in. The argument is present to /// be forward-compatible with future WebAssembly revisions. If a nonzero /// argument is passed to this function it will currently unconditionally abort. /// /// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-size #[inline] #[cfg_attr(test, assert_instr("memory.size", mem = 0))] #[rustc_args_required_const(0)] #[stable(feature = "simd_wasm32", since = "1.33.0")] pub fn memory_size(mem: u32) -> usize { unsafe { if mem != 0 { crate::intrinsics::abort(); } llvm_memory_size(0) as usize } } /// Corresponding intrinsic to wasm's [`memory.grow` instruction][instr] /// /// This function, when called, will attempt to grow the default linear memory /// by the specified `delta` of pages. The current WebAssembly page size is /// 65536 bytes (64 KB). If memory is successfully grown then the previous size /// of memory, in pages, is returned. If memory cannot be grown then /// `usize::max_value()` is returned. /// /// The argument `mem` is the numerical index of which memory to return the /// size of. Note that currently the WebAssembly specification only supports one /// memory, so it is required that zero is passed in. The argument is present to /// be forward-compatible with future WebAssembly revisions. If a nonzero /// argument is passed to this function it will currently unconditionally abort. /// /// [instr]: http://webassembly.github.io/spec/core/exec/instructions.html#exec-memory-grow #[inline] #[cfg_attr(test, assert_instr("memory.grow", mem = 0))] #[rustc_args_required_const(0)] #[stable(feature = "simd_wasm32", since = "1.33.0")] pub fn memory_grow(mem: u32, delta: usize) -> usize { unsafe { if mem != 0 { crate::intrinsics::abort(); } llvm_memory_grow(0, delta as i32) as isize as usize } } core_arch-0.1.5/src/wasm32/mod.rs010064400007650000024000000011201343447103600147100ustar0000000000000000//! WASM32 intrinsics #[cfg(test)] use stdsimd_test::assert_instr; #[cfg(test)] use wasm_bindgen_test::wasm_bindgen_test; #[cfg(any(target_feature = "atomics", dox))] mod atomic; #[cfg(any(target_feature = "atomics", dox))] pub use self::atomic::*; #[cfg(any(target_feature = "simd128", dox))] mod simd128; #[cfg(any(target_feature = "simd128", dox))] pub use self::simd128::*; mod memory; pub use self::memory::*; /// Generates the trap instruction `UNREACHABLE` #[cfg_attr(test, assert_instr(unreachable))] #[inline] pub unsafe fn unreachable() -> ! { crate::intrinsics::abort() } core_arch-0.1.5/src/wasm32/simd128.rs010064400007650000024000002357161343447103600153440ustar0000000000000000//! This module implements the [WebAssembly `SIMD128` ISA]. //! //! [WebAssembly `SIMD128` ISA]: //! https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md #![allow(non_camel_case_types)] use crate::{ core_arch::{simd::*, simd_llvm::*}, marker::Sized, mem::transmute, ptr, }; #[cfg(test)] use stdsimd_test::assert_instr; #[cfg(test)] use wasm_bindgen_test::wasm_bindgen_test; types! { /// WASM-specific 128-bit wide SIMD vector type. // N.B., internals here are arbitrary. pub struct v128(i32, i32, i32, i32); } #[allow(non_camel_case_types)] #[unstable(feature = "stdimd_internal", issue = "0")] pub(crate) trait v128Ext: Sized { fn as_v128(self) -> v128; #[inline] fn as_u8x16(self) -> u8x16 { unsafe { transmute(self.as_v128()) } } #[inline] fn as_u16x8(self) -> u16x8 { unsafe { transmute(self.as_v128()) } } #[inline] fn as_u32x4(self) -> u32x4 { unsafe { transmute(self.as_v128()) } } #[inline] fn as_u64x2(self) -> u64x2 { unsafe { transmute(self.as_v128()) } } #[inline] fn as_i8x16(self) -> i8x16 { unsafe { transmute(self.as_v128()) } } #[inline] fn as_i16x8(self) -> i16x8 { unsafe { transmute(self.as_v128()) } } #[inline] fn as_i32x4(self) -> i32x4 { unsafe { transmute(self.as_v128()) } } #[inline] fn as_i64x2(self) -> i64x2 { unsafe { transmute(self.as_v128()) } } #[inline] fn as_f32x4(self) -> f32x4 { unsafe { transmute(self.as_v128()) } } #[inline] fn as_f64x2(self) -> f64x2 { unsafe { transmute(self.as_v128()) } } } impl v128Ext for v128 { #[inline] fn as_v128(self) -> Self { self } } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.wasm.anytrue.v16i8"] fn llvm_i8x16_any_true(x: i8x16) -> i32; #[link_name = "llvm.wasm.alltrue.v16i8"] fn llvm_i8x16_all_true(x: i8x16) -> i32; #[link_name = "llvm.sadd.sat.v16i8"] fn llvm_i8x16_add_saturate_s(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.uadd.sat.v16i8"] fn llvm_i8x16_add_saturate_u(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.wasm.sub.saturate.signed.v16i8"] fn llvm_i8x16_sub_saturate_s(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.wasm.sub.saturate.unsigned.v16i8"] fn llvm_i8x16_sub_saturate_u(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.wasm.anytrue.v8i16"] fn llvm_i16x8_any_true(x: i16x8) -> i32; #[link_name = "llvm.wasm.alltrue.v8i16"] fn llvm_i16x8_all_true(x: i16x8) -> i32; #[link_name = "llvm.sadd.sat.v8i16"] fn llvm_i16x8_add_saturate_s(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.uadd.sat.v8i16"] fn llvm_i16x8_add_saturate_u(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.wasm.sub.saturate.signed.v8i16"] fn llvm_i16x8_sub_saturate_s(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.wasm.sub.saturate.unsigned.v8i16"] fn llvm_i16x8_sub_saturate_u(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.wasm.anytrue.v4i32"] fn llvm_i32x4_any_true(x: i32x4) -> i32; #[link_name = "llvm.wasm.alltrue.v4i32"] fn llvm_i32x4_all_true(x: i32x4) -> i32; #[link_name = "llvm.wasm.anytrue.v2i64"] fn llvm_i64x2_any_true(x: i64x2) -> i32; #[link_name = "llvm.wasm.alltrue.v2i64"] fn llvm_i64x2_all_true(x: i64x2) -> i32; #[link_name = "llvm.fabs.v4f32"] fn llvm_f32x4_abs(x: f32x4) -> f32x4; #[link_name = "llvm.sqrt.v4f32"] fn llvm_f32x4_sqrt(x: f32x4) -> f32x4; #[link_name = "llvm.minimum.v4f32"] fn llvm_f32x4_min(x: f32x4, y: f32x4) -> f32x4; #[link_name = "llvm.maximum.v4f32"] fn llvm_f32x4_max(x: f32x4, y: f32x4) -> f32x4; #[link_name = "llvm.fabs.v2f64"] fn llvm_f64x2_abs(x: f64x2) -> f64x2; #[link_name = "llvm.sqrt.v2f64"] fn llvm_f64x2_sqrt(x: f64x2) -> f64x2; #[link_name = "llvm.minimum.v2f64"] fn llvm_f64x2_min(x: f64x2, y: f64x2) -> f64x2; #[link_name = "llvm.maximum.v2f64"] fn llvm_f64x2_max(x: f64x2, y: f64x2) -> f64x2; #[link_name = "llvm.wasm.bitselect.v16i8"] fn llvm_bitselect(a: i8x16, b: i8x16, c: i8x16) -> i8x16; } /// Loads a `v128` vector from the given heap address. #[inline] #[cfg_attr(test, assert_instr(v128.load))] pub unsafe fn v128_load(m: *const v128) -> v128 { ptr::read(m) } /// Stores a `v128` vector to the given heap address. #[inline] #[cfg_attr(test, assert_instr(v128.store))] pub unsafe fn v128_store(m: *mut v128, a: v128) { ptr::write(m, a) } /// Materializes a constant SIMD value from the immediate operands. /// /// The `v128.const` instruction is encoded with 16 immediate bytes /// `imm` which provide the bits of the vector directly. #[inline] #[rustc_args_required_const(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)] #[cfg_attr(test, assert_instr( v128.const, a0 = 0, a1 = 1, a2 = 2, a3 = 3, a4 = 4, a5 = 5, a6 = 6, a7 = 7, a8 = 8, a9 = 9, a10 = 10, a11 = 11, a12 = 12, a13 = 13, a14 = 14, a15 = 15, ))] pub const fn v128_const( a0: u8, a1: u8, a2: u8, a3: u8, a4: u8, a5: u8, a6: u8, a7: u8, a8: u8, a9: u8, a10: u8, a11: u8, a12: u8, a13: u8, a14: u8, a15: u8, ) -> v128 { union U { imm: [u8; 16], vec: v128, } unsafe { U { imm: [ a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, ], } .vec } } /// Creates a vector with identical lanes. /// /// Constructs a vector with `x` replicated to all 16 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn i8x16_splat(a: i8) -> v128 { unsafe { transmute(i8x16::splat(a)) } } /// Extracts a lane from a 128-bit vector interpreted as 16 packed i8 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 16. #[inline] #[rustc_args_required_const(1)] pub unsafe fn i8x16_extract_lane(a: v128, imm: usize) -> i8 { #[cfg(test)] #[assert_instr(i8x16.extract_lane_s)] fn extract_lane_s(a: v128) -> i32 { unsafe { i8x16_extract_lane(a, 0) as i32 } } #[cfg(test)] #[assert_instr(i8x16.extract_lane_u)] fn extract_lane_u(a: v128) -> u32 { unsafe { i8x16_extract_lane(a, 0) as u32 } } simd_extract(a.as_i8x16(), imm as u32) } /// Replaces a lane from a 128-bit vector interpreted as 16 packed i8 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 16. #[inline] #[cfg_attr(test, assert_instr(i8x16.replace_lane, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn i8x16_replace_lane(a: v128, imm: usize, val: i8) -> v128 { transmute(simd_insert(a.as_i8x16(), imm as u32, val)) } /// Creates a vector with identical lanes. /// /// Construct a vector with `x` replicated to all 8 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn i16x8_splat(a: i16) -> v128 { unsafe { transmute(i16x8::splat(a)) } } /// Extracts a lane from a 128-bit vector interpreted as 8 packed i16 numbers. /// /// Extracts a the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 8. #[inline] #[rustc_args_required_const(1)] pub unsafe fn i16x8_extract_lane(a: v128, imm: usize) -> i16 { #[cfg(test)] #[assert_instr(i16x8.extract_lane_s)] fn extract_lane_s(a: v128) -> i32 { unsafe { i16x8_extract_lane(a, 0) as i32 } } #[cfg(test)] #[assert_instr(i16x8.extract_lane_u)] fn extract_lane_u(a: v128) -> u32 { unsafe { i16x8_extract_lane(a, 0) as u32 } } simd_extract(a.as_i16x8(), imm as u32) } /// Replaces a lane from a 128-bit vector interpreted as 8 packed i16 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 8. #[inline] #[cfg_attr(test, assert_instr(i16x8.replace_lane, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn i16x8_replace_lane(a: v128, imm: usize, val: i16) -> v128 { transmute(simd_insert(a.as_i16x8(), imm as u32, val)) } /// Creates a vector with identical lanes. /// /// Constructs a vector with `x` replicated to all 4 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn i32x4_splat(a: i32) -> v128 { unsafe { transmute(i32x4::splat(a)) } } /// Extracts a lane from a 128-bit vector interpreted as 4 packed i32 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 4. #[inline] #[cfg_attr(test, assert_instr(i32x4.extract_lane_s, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn i32x4_extract_lane(a: v128, imm: usize) -> i32 { simd_extract(a.as_i32x4(), imm as u32) } /// Replaces a lane from a 128-bit vector interpreted as 4 packed i32 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 4. #[inline] #[cfg_attr(test, assert_instr(i32x4.replace_lane, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn i32x4_replace_lane(a: v128, imm: usize, val: i32) -> v128 { transmute(simd_insert(a.as_i32x4(), imm as u32, val)) } /// Creates a vector with identical lanes. /// /// Construct a vector with `x` replicated to all 2 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn i64x2_splat(a: i64) -> v128 { unsafe { transmute(i64x2::splat(a)) } } /// Extracts a lane from a 128-bit vector interpreted as 2 packed i64 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 2. #[inline] #[cfg_attr(test, assert_instr(i64x2.extract_lane_s, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn i64x2_extract_lane(a: v128, imm: usize) -> i64 { simd_extract(a.as_i64x2(), imm as u32) } /// Replaces a lane from a 128-bit vector interpreted as 2 packed i64 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 2. #[inline] #[cfg_attr(test, assert_instr(i64x2.replace_lane, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn i64x2_replace_lane(a: v128, imm: usize, val: i64) -> v128 { transmute(simd_insert(a.as_i64x2(), imm as u32, val)) } /// Creates a vector with identical lanes. /// /// Constructs a vector with `x` replicated to all 4 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn f32x4_splat(a: f32) -> v128 { unsafe { transmute(f32x4::splat(a)) } } /// Extracts a lane from a 128-bit vector interpreted as 4 packed f32 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 4. #[inline] #[cfg_attr(test, assert_instr(f32x4.extract_lane_s, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn f32x4_extract_lane(a: v128, imm: usize) -> f32 { simd_extract(a.as_f32x4(), imm as u32) } /// Replaces a lane from a 128-bit vector interpreted as 4 packed f32 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 4. #[inline] #[cfg_attr(test, assert_instr(f32x4.replace_lane, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn f32x4_replace_lane(a: v128, imm: usize, val: f32) -> v128 { transmute(simd_insert(a.as_f32x4(), imm as u32, val)) } /// Creates a vector with identical lanes. /// /// Constructs a vector with `x` replicated to all 2 lanes. #[inline] #[cfg_attr(test, assert_instr(i8x16.splat))] pub fn f64x2_splat(a: f64) -> v128 { unsafe { transmute(f64x2::splat(a)) } } /// Extracts lane from a 128-bit vector interpreted as 2 packed f64 numbers. /// /// Extracts the scalar value of lane specified in the immediate mode operand /// `imm` from `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 2. #[inline] #[cfg_attr(test, assert_instr(f64x2.extract_lane_s, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn f64x2_extract_lane(a: v128, imm: usize) -> f64 { simd_extract(a.as_f64x2(), imm as u32) } /// Replaces a lane from a 128-bit vector interpreted as 2 packed f64 numbers. /// /// Replaces the scalar value of lane specified in the immediate mode operand /// `imm` with `a`. /// /// # Unsafety /// /// This function has undefined behavior if `imm` is greater than or equal to /// 2. #[inline] #[cfg_attr(test, assert_instr(f64x2.replace_lane, imm = 0))] #[rustc_args_required_const(1)] pub unsafe fn f64x2_replace_lane(a: v128, imm: usize, val: f64) -> v128 { transmute(simd_insert(a.as_f64x2(), imm as u32, val)) } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// integers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were equal, or all zeros if the elements were not equal. #[inline] #[cfg_attr(test, assert_instr(i8x16.eq))] pub fn i8x16_eq(a: v128, b: v128) -> v128 { unsafe { transmute(simd_eq::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// integers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were not equal, or all zeros if the elements were equal. #[inline] #[cfg_attr(test, assert_instr(i8x16.ne))] pub fn i8x16_ne(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ne::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.lt_s))] pub fn i8x16_lt_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_lt::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.lt_u))] pub fn i8x16_lt_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_lt::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.gt_s))] pub fn i8x16_gt_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_gt::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.gt_u))] pub fn i8x16_gt_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_gt::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.le_s))] pub fn i8x16_le_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_le::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.le_u))] pub fn i8x16_le_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_le::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.ge_s))] pub fn i8x16_ge_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ge::<_, i8x16>(a.as_i8x16(), b.as_i8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 16 eight-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i8x16.ge_u))] pub fn i8x16_ge_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ge::<_, i8x16>(a.as_u8x16(), b.as_u8x16())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// integers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were equal, or all zeros if the elements were not equal. #[inline] #[cfg_attr(test, assert_instr(i16x8.eq))] pub fn i16x8_eq(a: v128, b: v128) -> v128 { unsafe { transmute(simd_eq::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// integers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were not equal, or all zeros if the elements were equal. #[inline] #[cfg_attr(test, assert_instr(i16x8.ne))] pub fn i16x8_ne(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ne::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.lt_s))] pub fn i16x8_lt_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_lt::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.lt_u))] pub fn i16x8_lt_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_lt::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.gt_s))] pub fn i16x8_gt_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_gt::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.gt_u))] pub fn i16x8_gt_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_gt::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.le_s))] pub fn i16x8_le_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_le::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.le_u))] pub fn i16x8_le_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_le::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.ge_s))] pub fn i16x8_ge_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ge::<_, i16x8>(a.as_i16x8(), b.as_i16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 8 sixteen-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i16x8.ge_u))] pub fn i16x8_ge_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ge::<_, i16x8>(a.as_u16x8(), b.as_u16x8())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// integers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were equal, or all zeros if the elements were not equal. #[inline] #[cfg_attr(test, assert_instr(i32x4.eq))] pub fn i32x4_eq(a: v128, b: v128) -> v128 { unsafe { transmute(simd_eq::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// integers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were not equal, or all zeros if the elements were equal. #[inline] #[cfg_attr(test, assert_instr(i32x4.ne))] pub fn i32x4_ne(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ne::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.lt_s))] pub fn i32x4_lt_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_lt::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.lt_u))] pub fn i32x4_lt_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_lt::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.gt_s))] pub fn i32x4_gt_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_gt::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.gt_u))] pub fn i32x4_gt_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_gt::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.le_s))] pub fn i32x4_le_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_le::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.le_u))] pub fn i32x4_le_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_le::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// signed integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.ge_s))] pub fn i32x4_ge_s(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ge::<_, i32x4>(a.as_i32x4(), b.as_i32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// unsigned integers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(i32x4.ge_u))] pub fn i32x4_ge_u(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ge::<_, i32x4>(a.as_u32x4(), b.as_u32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were equal, or all zeros if the elements were not equal. #[inline] #[cfg_attr(test, assert_instr(f32x4.eq))] pub fn f32x4_eq(a: v128, b: v128) -> v128 { unsafe { transmute(simd_eq::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were not equal, or all zeros if the elements were equal. #[inline] #[cfg_attr(test, assert_instr(f32x4.ne))] pub fn f32x4_ne(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ne::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f32x4.lt))] pub fn f32x4_lt(a: v128, b: v128) -> v128 { unsafe { transmute(simd_lt::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f32x4.gt))] pub fn f32x4_gt(a: v128, b: v128) -> v128 { unsafe { transmute(simd_gt::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f32x4.le))] pub fn f32x4_le(a: v128, b: v128) -> v128 { unsafe { transmute(simd_le::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 4 thirty-two-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f32x4.ge))] pub fn f32x4_ge(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ge::<_, i32x4>(a.as_f32x4(), b.as_f32x4())) } } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were equal, or all zeros if the elements were not equal. #[inline] #[cfg_attr(test, assert_instr(f64x2.eq))] pub fn f64x2_eq(a: v128, b: v128) -> v128 { unsafe { transmute(simd_eq::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise elements /// were not equal, or all zeros if the elements were equal. #[inline] #[cfg_attr(test, assert_instr(f64x2.ne))] pub fn f64x2_ne(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ne::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f64x2.lt))] pub fn f64x2_lt(a: v128, b: v128) -> v128 { unsafe { transmute(simd_lt::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f64x2.gt))] pub fn f64x2_gt(a: v128, b: v128) -> v128 { unsafe { transmute(simd_gt::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is less than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f64x2.le))] pub fn f64x2_le(a: v128, b: v128) -> v128 { unsafe { transmute(simd_le::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } } /// Compares two 128-bit vectors as if they were two vectors of 2 sixty-four-bit /// floating point numbers. /// /// Returns a new vector where each lane is all ones if the pairwise left /// element is greater than the pairwise right element, or all zeros otherwise. #[inline] #[cfg_attr(test, assert_instr(f64x2.ge))] pub fn f64x2_ge(a: v128, b: v128) -> v128 { unsafe { transmute(simd_ge::<_, i64x2>(a.as_f64x2(), b.as_f64x2())) } } /// Flips each bit of the 128-bit input vector. #[inline] #[cfg_attr(test, assert_instr(v128.not))] pub fn v128_not(a: v128) -> v128 { unsafe { transmute(simd_xor(a.as_i64x2(), i64x2(!0, !0))) } } /// Performs a bitwise and of the two input 128-bit vectors, returning the /// resulting vector. #[inline] #[cfg_attr(test, assert_instr(v128.and))] pub fn v128_and(a: v128, b: v128) -> v128 { unsafe { transmute(simd_and(a.as_i64x2(), b.as_i64x2())) } } /// Performs a bitwise or of the two input 128-bit vectors, returning the /// resulting vector. #[inline] #[cfg_attr(test, assert_instr(v128.or))] pub fn v128_or(a: v128, b: v128) -> v128 { unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) } } /// Performs a bitwise xor of the two input 128-bit vectors, returning the /// resulting vector. #[inline] #[cfg_attr(test, assert_instr(v128.xor))] pub fn v128_xor(a: v128, b: v128) -> v128 { unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) } } /// Use the bitmask in `c` to select bits from `v1` when 1 and `v2` when 0. #[inline] #[cfg_attr(test, assert_instr(v128.bitselect))] pub fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 { unsafe { transmute(llvm_bitselect(c.as_i8x16(), v1.as_i8x16(), v2.as_i8x16())) } } /// Negates a 128-bit vectors intepreted as sixteen 8-bit signed integers #[inline] #[cfg_attr(test, assert_instr(i8x16.neg))] pub fn i8x16_neg(a: v128) -> v128 { unsafe { transmute(simd_mul(a.as_i8x16(), i8x16::splat(-1))) } } /// Returns 1 if any lane is nonzero or 0 if all lanes are zero. #[inline] #[cfg_attr(test, assert_instr(i8x16.any_true))] pub fn i8x16_any_true(a: v128) -> i32 { unsafe { llvm_i8x16_any_true(a.as_i8x16()) } } /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero. #[inline] #[cfg_attr(test, assert_instr(i8x16.all_true))] pub fn i8x16_all_true(a: v128) -> i32 { unsafe { llvm_i8x16_all_true(a.as_i8x16()) } } /// Shifts each lane to the left by the specified number of bits. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i8x16.shl))] pub fn i8x16_shl(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shl(a.as_i8x16(), i8x16::splat(amt as i8))) } } /// Shifts each lane to the right by the specified number of bits, sign /// extending. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i8x16.shl))] pub fn i8x16_shr_s(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shr(a.as_i8x16(), i8x16::splat(amt as i8))) } } /// Shifts each lane to the right by the specified number of bits, shifting in /// zeros. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i8x16.shl))] pub fn i8x16_shr_u(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shr(a.as_u8x16(), u8x16::splat(amt as u8))) } } /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers. #[inline] #[cfg_attr(test, assert_instr(i8x16.add))] pub fn i8x16_add(a: v128, b: v128) -> v128 { unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) } } /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit signed /// integers, saturating on overflow to `i8::max_value()`. #[inline] #[cfg_attr(test, assert_instr(i8x16.add_saturate_s))] pub fn i8x16_add_saturate_s(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_i8x16_add_saturate_s(a.as_i8x16(), b.as_i8x16())) } } /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit unsigned /// integers, saturating on overflow to `u8::max_value()`. #[inline] #[cfg_attr(test, assert_instr(i8x16.add_saturate_u))] pub fn i8x16_add_saturate_u(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_i8x16_add_saturate_u(a.as_i8x16(), b.as_i8x16())) } } /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit integers. #[inline] #[cfg_attr(test, assert_instr(i8x16.sub))] pub fn i8x16_sub(a: v128, b: v128) -> v128 { unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) } } /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit /// signed integers, saturating on overflow to `i8::min_value()`. #[inline] #[cfg_attr(test, assert_instr(i8x16.sub_saturate_s))] pub fn i8x16_sub_saturate_s(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_i8x16_sub_saturate_s(a.as_i8x16(), b.as_i8x16())) } } /// Subtracts two 128-bit vectors as if they were two packed sixteen 8-bit /// unsigned integers, saturating on overflow to 0. #[inline] #[cfg_attr(test, assert_instr(i8x16.sub_saturate_u))] pub fn i8x16_sub_saturate_u(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_i8x16_sub_saturate_u(a.as_i8x16(), b.as_i8x16())) } } /// Multiplies two 128-bit vectors as if they were two packed sixteen 8-bit /// signed integers. #[inline] #[cfg_attr(test, assert_instr(i8x16.mul))] pub fn i8x16_mul(a: v128, b: v128) -> v128 { unsafe { transmute(simd_mul(a.as_i8x16(), b.as_i8x16())) } } /// Negates a 128-bit vectors intepreted as eight 16-bit signed integers #[inline] #[cfg_attr(test, assert_instr(i16x8.neg))] pub fn i16x8_neg(a: v128) -> v128 { unsafe { transmute(simd_mul(a.as_i16x8(), i16x8::splat(-1))) } } /// Returns 1 if any lane is nonzero or 0 if all lanes are zero. #[inline] #[cfg_attr(test, assert_instr(i16x8.any_true))] pub fn i16x8_any_true(a: v128) -> i32 { unsafe { llvm_i16x8_any_true(a.as_i16x8()) } } /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero. #[inline] #[cfg_attr(test, assert_instr(i16x8.all_true))] pub fn i16x8_all_true(a: v128) -> i32 { unsafe { llvm_i16x8_all_true(a.as_i16x8()) } } /// Shifts each lane to the left by the specified number of bits. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i16x8.shl))] pub fn i16x8_shl(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shl(a.as_i16x8(), i16x8::splat(amt as i16))) } } /// Shifts each lane to the right by the specified number of bits, sign /// extending. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i16x8.shl))] pub fn i16x8_shr_s(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(amt as i16))) } } /// Shifts each lane to the right by the specified number of bits, shifting in /// zeros. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i16x8.shl))] pub fn i16x8_shr_u(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shr(a.as_u16x8(), u16x8::splat(amt as u16))) } } /// Adds two 128-bit vectors as if they were two packed eight 16-bit integers. #[inline] #[cfg_attr(test, assert_instr(i16x8.add))] pub fn i16x8_add(a: v128, b: v128) -> v128 { unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) } } /// Adds two 128-bit vectors as if they were two packed eight 16-bit signed /// integers, saturating on overflow to `i16::max_value()`. #[inline] #[cfg_attr(test, assert_instr(i16x8.add_saturate_s))] pub fn i16x8_add_saturate_s(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_i16x8_add_saturate_s(a.as_i16x8(), b.as_i16x8())) } } /// Adds two 128-bit vectors as if they were two packed eight 16-bit unsigned /// integers, saturating on overflow to `u16::max_value()`. #[inline] #[cfg_attr(test, assert_instr(i16x8.add_saturate_u))] pub fn i16x8_add_saturate_u(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_i16x8_add_saturate_u(a.as_i16x8(), b.as_i16x8())) } } /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit integers. #[inline] #[cfg_attr(test, assert_instr(i16x8.sub))] pub fn i16x8_sub(a: v128, b: v128) -> v128 { unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) } } /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit /// signed integers, saturating on overflow to `i16::min_value()`. #[inline] #[cfg_attr(test, assert_instr(i16x8.sub_saturate_s))] pub fn i16x8_sub_saturate_s(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_i16x8_sub_saturate_s(a.as_i16x8(), b.as_i16x8())) } } /// Subtracts two 128-bit vectors as if they were two packed eight 16-bit /// unsigned integers, saturating on overflow to 0. #[inline] #[cfg_attr(test, assert_instr(i16x8.sub_saturate_u))] pub fn i16x8_sub_saturate_u(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_i16x8_sub_saturate_u(a.as_i16x8(), b.as_i16x8())) } } /// Multiplies two 128-bit vectors as if they were two packed eight 16-bit /// signed integers. #[inline] #[cfg_attr(test, assert_instr(i16x8.mul))] pub fn i16x8_mul(a: v128, b: v128) -> v128 { unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) } } /// Negates a 128-bit vectors intepreted as four 32-bit signed integers #[inline] #[cfg_attr(test, assert_instr(i32x4.neg))] pub fn i32x4_neg(a: v128) -> v128 { unsafe { transmute(simd_mul(a.as_i32x4(), i32x4::splat(-1))) } } /// Returns 1 if any lane is nonzero or 0 if all lanes are zero. #[inline] #[cfg_attr(test, assert_instr(i32x4.any_true))] pub fn i32x4_any_true(a: v128) -> i32 { unsafe { llvm_i32x4_any_true(a.as_i32x4()) } } /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero. #[inline] #[cfg_attr(test, assert_instr(i32x4.all_true))] pub fn i32x4_all_true(a: v128) -> i32 { unsafe { llvm_i32x4_all_true(a.as_i32x4()) } } /// Shifts each lane to the left by the specified number of bits. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i32x4.shl))] pub fn i32x4_shl(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shl(a.as_i32x4(), i32x4::splat(amt as i32))) } } /// Shifts each lane to the right by the specified number of bits, sign /// extending. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i32x4.shl))] pub fn i32x4_shr_s(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(amt as i32))) } } /// Shifts each lane to the right by the specified number of bits, shifting in /// zeros. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i32x4.shl))] pub fn i32x4_shr_u(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shr(a.as_u32x4(), u32x4::splat(amt as u32))) } } /// Adds two 128-bit vectors as if they were two packed four 32-bit integers. #[inline] #[cfg_attr(test, assert_instr(i32x4.add))] pub fn i32x4_add(a: v128, b: v128) -> v128 { unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) } } /// Subtracts two 128-bit vectors as if they were two packed four 32-bit integers. #[inline] #[cfg_attr(test, assert_instr(i32x4.sub))] pub fn i32x4_sub(a: v128, b: v128) -> v128 { unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) } } /// Multiplies two 128-bit vectors as if they were two packed four 32-bit /// signed integers. #[inline] #[cfg_attr(test, assert_instr(i32x4.mul))] pub fn i32x4_mul(a: v128, b: v128) -> v128 { unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) } } /// Negates a 128-bit vectors intepreted as two 64-bit signed integers #[inline] #[cfg_attr(test, assert_instr(i32x4.neg))] pub fn i64x2_neg(a: v128) -> v128 { unsafe { transmute(simd_mul(a.as_i64x2(), i64x2::splat(-1))) } } /// Returns 1 if any lane is nonzero or 0 if all lanes are zero. #[inline] #[cfg_attr(test, assert_instr(i64x2.any_true))] pub fn i64x2_any_true(a: v128) -> i32 { unsafe { llvm_i64x2_any_true(a.as_i64x2()) } } /// Returns 1 if all lanes are nonzero or 0 if any lane is nonzero. #[inline] #[cfg_attr(test, assert_instr(i64x2.all_true))] pub fn i64x2_all_true(a: v128) -> i32 { unsafe { llvm_i64x2_all_true(a.as_i64x2()) } } /// Shifts each lane to the left by the specified number of bits. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i64x2.shl))] pub fn i64x2_shl(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shl(a.as_i64x2(), i64x2::splat(amt as i64))) } } /// Shifts each lane to the right by the specified number of bits, sign /// extending. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i64x2.shl))] pub fn i64x2_shr_s(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shr(a.as_i64x2(), i64x2::splat(amt as i64))) } } /// Shifts each lane to the right by the specified number of bits, shifting in /// zeros. /// /// Only the low bits of the shift amount are used if the shift amount is /// greater than the lane width. #[inline] #[cfg_attr(test, assert_instr(i64x2.shl))] pub fn i64x2_shr_u(a: v128, amt: u32) -> v128 { unsafe { transmute(simd_shr(a.as_u64x2(), u64x2::splat(amt as u64))) } } /// Adds two 128-bit vectors as if they were two packed two 64-bit integers. #[inline] #[cfg_attr(test, assert_instr(i64x2.add))] pub fn i64x2_add(a: v128, b: v128) -> v128 { unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) } } /// Subtracts two 128-bit vectors as if they were two packed two 64-bit integers. #[inline] #[cfg_attr(test, assert_instr(i64x2.sub))] pub fn i64x2_sub(a: v128, b: v128) -> v128 { unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) } } /// Calculates the absolute value of each lane of a 128-bit vector interpreted /// as four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.abs))] pub fn f32x4_abs(a: v128) -> v128 { unsafe { transmute(llvm_f32x4_abs(a.as_f32x4())) } } /// Negates each lane of a 128-bit vector interpreted as four 32-bit floating /// point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.abs))] pub fn f32x4_neg(a: v128) -> v128 { unsafe { f32x4_mul(a, transmute(f32x4(-1.0, -1.0, -1.0, -1.0))) } } /// Calculates the square root of each lane of a 128-bit vector interpreted as /// four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.sqrt))] pub fn f32x4_sqrt(a: v128) -> v128 { unsafe { transmute(llvm_f32x4_sqrt(a.as_f32x4())) } } /// Adds pairwise lanes of two 128-bit vectors interpreted as four 32-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.add))] pub fn f32x4_add(a: v128, b: v128) -> v128 { unsafe { transmute(simd_add(a.as_f32x4(), b.as_f32x4())) } } /// Subtracts pairwise lanes of two 128-bit vectors interpreted as four 32-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.sub))] pub fn f32x4_sub(a: v128, b: v128) -> v128 { unsafe { transmute(simd_sub(a.as_f32x4(), b.as_f32x4())) } } /// Multiplies pairwise lanes of two 128-bit vectors interpreted as four 32-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.mul))] pub fn f32x4_mul(a: v128, b: v128) -> v128 { unsafe { transmute(simd_mul(a.as_f32x4(), b.as_f32x4())) } } /// Divides pairwise lanes of two 128-bit vectors interpreted as four 32-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.div))] pub fn f32x4_div(a: v128, b: v128) -> v128 { unsafe { transmute(simd_div(a.as_f32x4(), b.as_f32x4())) } } /// Calculates the minimum of pairwise lanes of two 128-bit vectors interpreted /// as four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.min))] pub fn f32x4_min(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_f32x4_min(a.as_f32x4(), b.as_f32x4())) } } /// Calculates the maximum of pairwise lanes of two 128-bit vectors interpreted /// as four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f32x4.max))] pub fn f32x4_max(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_f32x4_max(a.as_f32x4(), b.as_f32x4())) } } /// Calculates the absolute value of each lane of a 128-bit vector interpreted /// as two 64-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f64x2.abs))] pub fn f64x2_abs(a: v128) -> v128 { unsafe { transmute(llvm_f64x2_abs(a.as_f64x2())) } } /// Negates each lane of a 128-bit vector interpreted as two 64-bit floating /// point numbers. #[inline] #[cfg_attr(test, assert_instr(f64x2.abs))] pub fn f64x2_neg(a: v128) -> v128 { unsafe { f64x2_mul(a, transmute(f64x2(-1.0, -1.0))) } } /// Calculates the square root of each lane of a 128-bit vector interpreted as /// two 64-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f64x2.sqrt))] pub fn f64x2_sqrt(a: v128) -> v128 { unsafe { transmute(llvm_f64x2_sqrt(a.as_f64x2())) } } /// Adds pairwise lanes of two 128-bit vectors interpreted as two 64-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f64x2.add))] pub fn f64x2_add(a: v128, b: v128) -> v128 { unsafe { transmute(simd_add(a.as_f64x2(), b.as_f64x2())) } } /// Subtracts pairwise lanes of two 128-bit vectors interpreted as two 64-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f64x2.sub))] pub fn f64x2_sub(a: v128, b: v128) -> v128 { unsafe { transmute(simd_sub(a.as_f64x2(), b.as_f64x2())) } } /// Multiplies pairwise lanes of two 128-bit vectors interpreted as two 64-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f64x2.mul))] pub fn f64x2_mul(a: v128, b: v128) -> v128 { unsafe { transmute(simd_mul(a.as_f64x2(), b.as_f64x2())) } } /// Divides pairwise lanes of two 128-bit vectors interpreted as two 64-bit /// floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f64x2.div))] pub fn f64x2_div(a: v128, b: v128) -> v128 { unsafe { transmute(simd_div(a.as_f64x2(), b.as_f64x2())) } } /// Calculates the minimum of pairwise lanes of two 128-bit vectors interpreted /// as two 64-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f64x2.min))] pub fn f64x2_min(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_f64x2_min(a.as_f64x2(), b.as_f64x2())) } } /// Calculates the maximum of pairwise lanes of two 128-bit vectors interpreted /// as two 64-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr(f64x2.max))] pub fn f64x2_max(a: v128, b: v128) -> v128 { unsafe { transmute(llvm_f64x2_max(a.as_f64x2(), b.as_f64x2())) } } /// Converts a 128-bit vector interpreted as four 32-bit floating point numbers /// into a 128-bit vector of four 32-bit signed integers. /// /// NaN is converted to 0 and if it's out of bounds it becomes the nearest /// representable intger. #[inline] #[cfg_attr(test, assert_instr("i32x4.trunc_s/f32x4:sat"))] pub fn i32x4_trunc_s_f32x4_sat(a: v128) -> v128 { unsafe { transmute(simd_cast::<_, i32x4>(a.as_f32x4())) } } /// Converts a 128-bit vector interpreted as four 32-bit floating point numbers /// into a 128-bit vector of four 32-bit unsigned integers. /// /// NaN is converted to 0 and if it's out of bounds it becomes the nearest /// representable intger. #[inline] #[cfg_attr(test, assert_instr("i32x4.trunc_u/f32x4:sat"))] pub fn i32x4_trunc_u_f32x4_sat(a: v128) -> v128 { unsafe { transmute(simd_cast::<_, u32x4>(a.as_f32x4())) } } /// Converts a 128-bit vector interpreted as two 64-bit floating point numbers /// into a 128-bit vector of two 64-bit signed integers. /// /// NaN is converted to 0 and if it's out of bounds it becomes the nearest /// representable intger. #[inline] #[cfg_attr(test, assert_instr("i32x4.trunc_s/f32x4:sat"))] pub fn i64x2_trunc_s_f64x2_sat(a: v128) -> v128 { unsafe { transmute(simd_cast::<_, i64x2>(a.as_f64x2())) } } /// Converts a 128-bit vector interpreted as two 64-bit floating point numbers /// into a 128-bit vector of two 64-bit unsigned integers. /// /// NaN is converted to 0 and if it's out of bounds it becomes the nearest /// representable intger. #[inline] #[cfg_attr(test, assert_instr("i64x2.trunc_u/f64x2:sat"))] pub fn i64x2_trunc_u_f64x2_sat(a: v128) -> v128 { unsafe { transmute(simd_cast::<_, u64x2>(a.as_f64x2())) } } /// Converts a 128-bit vector interpreted as four 32-bit signed integers into a /// 128-bit vector of four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr("f32x4.convert_s/i32x4"))] pub fn f32x4_convert_s_i32x4(a: v128) -> v128 { unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) } } /// Converts a 128-bit vector interpreted as four 32-bit unsigned integers into a /// 128-bit vector of four 32-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr("f32x4.convert_u/i32x4"))] pub fn f32x4_convert_u_i32x4(a: v128) -> v128 { unsafe { transmute(simd_cast::<_, f32x4>(a.as_u32x4())) } } /// Converts a 128-bit vector interpreted as two 64-bit signed integers into a /// 128-bit vector of two 64-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr("f64x2.convert_s/i64x2"))] pub fn f64x2_convert_s_i64x2(a: v128) -> v128 { unsafe { transmute(simd_cast::<_, f64x2>(a.as_i64x2())) } } /// Converts a 128-bit vector interpreted as two 64-bit unsigned integers into a /// 128-bit vector of two 64-bit floating point numbers. #[inline] #[cfg_attr(test, assert_instr("f64x2.convert_u/i64x2"))] pub fn f64x2_convert_u_i64x2(a: v128) -> v128 { unsafe { transmute(simd_cast::<_, f64x2>(a.as_u64x2())) } } // #[cfg(test)] // pub mod tests { // use super::*; // use std; // use std::mem; // use std::prelude::v1::*; // use wasm_bindgen_test::*; // // fn compare_bytes(a: v128, b: v128) { // let a: [u8; 16] = unsafe { transmute(a) }; // let b: [u8; 16] = unsafe { transmute(b) }; // assert_eq!(a, b); // } // // #[wasm_bindgen_test] // fn v128_const() { // const A: v128 = unsafe { // v128::const_([ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // ]) // }; // compare_bytes(A, A); // } // // macro_rules! test_splat { // ($test_id:ident: $id:ident($val:expr) => $($vals:expr),*) => { // #[wasm_bindgen_test] // fn $test_id() { // const A: v128 = unsafe { // $id::splat($val) // }; // const B: v128 = unsafe { // v128::const_([$($vals),*]) // }; // compare_bytes(A, B); // } // } // } // // test_splat!(i8x16_splat: i8x16(42) => 42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42); // test_splat!(i16x8_splat: i16x8(42) => 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0, 42, 0); // test_splat!(i32x4_splat: i32x4(42) => 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0, 42, 0, 0, 0); // test_splat!(i64x2_splat: i64x2(42) => 42, 0, 0, 0, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0); // test_splat!(f32x4_splat: f32x4(42.) => 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66, 0, 0, 40, 66); // test_splat!(f64x2_splat: f64x2(42.) => 0, 0, 0, 0, 0, 0, 69, 64, 0, 0, 0, 0, 0, 0, 69, 64); // // // tests extract and replace lanes // macro_rules! test_extract { // ($test_id:ident: $id:ident[$ety:ident] => $extract_fn:ident | [$val:expr; $count:expr] // | [$($vals:expr),*] => ($other:expr) // | $($ids:expr),*) => { // #[wasm_bindgen_test] // fn $test_id() { // unsafe { // // splat vector and check that all indices contain the same value // // splatted: // const A: v128 = unsafe { // $id::splat($val) // }; // $( // assert_eq!($id::$extract_fn(A, $ids) as $ety, $val); // )*; // // // create a vector from array and check that the indices contain // // the same values as in the array: // let arr: [$ety; $count] = [$($vals),*]; // let mut vec: v128 = transmute(arr); // $( // assert_eq!($id::$extract_fn(vec, $ids) as $ety, arr[$ids]); // )*; // // // replace lane 0 with another value // vec = $id::replace_lane(vec, 0, $other); // assert_ne!($id::$extract_fn(vec, 0) as $ety, arr[0]); // assert_eq!($id::$extract_fn(vec, 0) as $ety, $other); // } // } // } // } // // test_extract!(i8x16_extract_u: i8x16[u8] => extract_lane_u | [255; 16] // | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] => (42) // | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 // ); // test_extract!(i8x16_extract_s: i8x16[i8] => extract_lane_s | [-122; 16] // | [0, -1, 2, -3, 4, -5, 6, -7, 8, -9, 10, -11, 12, -13, 14, -15] => (-42) // | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 // ); // // test_extract!(i16x8_extract_u: i16x8[u16] => extract_lane_u | [255; 8] // | [0, 1, 2, 3, 4, 5, 6, 7] => (42) | 0, 1, 2, 3, 4, 5, 6, 7 // ); // test_extract!(i16x8_extract_s: i16x8[i16] => extract_lane_s | [-122; 8] // | [0, -1, 2, -3, 4, -5, 6, -7] => (-42) | 0, 1, 2, 3, 4, 5, 6, 7 // ); // test_extract!(i32x4_extract: i32x4[i32] => extract_lane | [-122; 4] // | [0, -1, 2, -3] => (42) | 0, 1, 2, 3 // ); // test_extract!(i64x2_extract: i64x2[i64] => extract_lane | [-122; 2] // | [0, -1] => (42) | 0, 1 // ); // test_extract!(f32x4_extract: f32x4[f32] => extract_lane | [-122.; 4] // | [0., -1., 2., -3.] => (42.) | 0, 1, 2, 3 // ); // test_extract!(f64x2_extract: f64x2[f64] => extract_lane | [-122.; 2] // | [0., -1.] => (42.) | 0, 1 // ); // // #[wasm_bindgen_test] // fn v8x16_shuffle() { // unsafe { // let a = [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; // let b = [ // 16_u8, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, // 31, // ]; // // let vec_a: v128 = transmute(a); // let vec_b: v128 = transmute(b); // // let vec_r = v8x16_shuffle!( // vec_a, // vec_b, // [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30] // ); // // let e = // [0_u8, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]; // let vec_e: v128 = transmute(e); // compare_bytes(vec_r, vec_e); // } // } // // macro_rules! floating_point { // (f32) => { // true // }; // (f64) => { // true // }; // ($id:ident) => { // false // }; // } // // trait IsNan: Sized { // fn is_nan(self) -> bool { // false // } // } // impl IsNan for i8 {} // impl IsNan for i16 {} // impl IsNan for i32 {} // impl IsNan for i64 {} // // macro_rules! test_bop { // ($id:ident[$ety:ident; $ecount:expr] | // $binary_op:ident [$op_test_id:ident] : // ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { // test_bop!( // $id[$ety; $ecount] => $ety | $binary_op [ $op_test_id ]: // ([$($in_a),*], [$($in_b),*]) => [$($out),*] // ); // // }; // ($id:ident[$ety:ident; $ecount:expr] => $oty:ident | // $binary_op:ident [$op_test_id:ident] : // ([$($in_a:expr),*], [$($in_b:expr),*]) => [$($out:expr),*]) => { // #[wasm_bindgen_test] // fn $op_test_id() { // unsafe { // let a_input: [$ety; $ecount] = [$($in_a),*]; // let b_input: [$ety; $ecount] = [$($in_b),*]; // let output: [$oty; $ecount] = [$($out),*]; // // let a_vec_in: v128 = transmute(a_input); // let b_vec_in: v128 = transmute(b_input); // let vec_res: v128 = $id::$binary_op(a_vec_in, b_vec_in); // // let res: [$oty; $ecount] = transmute(vec_res); // // if !floating_point!($ety) { // assert_eq!(res, output); // } else { // for i in 0..$ecount { // let r = res[i]; // let o = output[i]; // assert_eq!(r.is_nan(), o.is_nan()); // if !r.is_nan() { // assert_eq!(r, o); // } // } // } // } // } // } // } // // macro_rules! test_bops { // ($id:ident[$ety:ident; $ecount:expr] | // $binary_op:ident [$op_test_id:ident]: // ([$($in_a:expr),*], $in_b:expr) => [$($out:expr),*]) => { // #[wasm_bindgen_test] // fn $op_test_id() { // unsafe { // let a_input: [$ety; $ecount] = [$($in_a),*]; // let output: [$ety; $ecount] = [$($out),*]; // // let a_vec_in: v128 = transmute(a_input); // let vec_res: v128 = $id::$binary_op(a_vec_in, $in_b); // // let res: [$ety; $ecount] = transmute(vec_res); // assert_eq!(res, output); // } // } // } // } // // macro_rules! test_uop { // ($id:ident[$ety:ident; $ecount:expr] | // $unary_op:ident [$op_test_id:ident]: [$($in_a:expr),*] => [$($out:expr),*]) => { // #[wasm_bindgen_test] // fn $op_test_id() { // unsafe { // let a_input: [$ety; $ecount] = [$($in_a),*]; // let output: [$ety; $ecount] = [$($out),*]; // // let a_vec_in: v128 = transmute(a_input); // let vec_res: v128 = $id::$unary_op(a_vec_in); // // let res: [$ety; $ecount] = transmute(vec_res); // assert_eq!(res, output); // } // } // } // } // // test_bop!(i8x16[i8; 16] | add[i8x16_add_test]: // ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], // [8, i8::min_value(), 10, 11, 12, 13, 14, 1, 1, 1, 1, 1, 1, 1, 1, 1]) => // [8, i8::max_value(), 12, 14, 16, 18, 20, i8::min_value(), 2, 2, 2, 2, 2, 2, 2, 2]); // test_bop!(i8x16[i8; 16] | sub[i8x16_sub_test]: // ([0, -1, 2, 3, 4, 5, 6, -1, 1, 1, 1, 1, 1, 1, 1, 1], // [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) => // [-8, i8::max_value(), -8, -8, -8, -8, -8, i8::min_value(), 0, 0, 0, 0, 0, 0, 0, 0]); // test_bop!(i8x16[i8; 16] | mul[i8x16_mul_test]: // ([0, -2, 2, 3, 4, 5, 6, 2, 1, 1, 1, 1, 1, 1, 1, 1], // [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1]) => // [0, 0, 20, 33, 48, 65, 84, -2, 1, 1, 1, 1, 1, 1, 1, 1]); // test_uop!(i8x16[i8; 16] | neg[i8x16_neg_test]: // [8, i8::min_value(), 10, 11, 12, 13, 14, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1] => // [-8, i8::min_value(), -10, -11, -12, -13, -14, i8::min_value() + 1, -1, -1, -1, -1, -1, -1, -1, -1]); // // test_bop!(i16x8[i16; 8] | add[i16x8_add_test]: // ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], // [8, i16::min_value(), 10, 11, 12, 13, 14, 1]) => // [8, i16::max_value(), 12, 14, 16, 18, 20, i16::min_value()]); // test_bop!(i16x8[i16; 8] | sub[i16x8_sub_test]: // ([0, -1, 2, 3, 4, 5, 6, -1], // [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) => // [-8, i16::max_value(), -8, -8, -8, -8, -8, i16::min_value()]); // test_bop!(i16x8[i16; 8] | mul[i16x8_mul_test]: // ([0, -2, 2, 3, 4, 5, 6, 2], // [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()]) => // [0, 0, 20, 33, 48, 65, 84, -2]); // test_uop!(i16x8[i16; 8] | neg[i16x8_neg_test]: // [8, i16::min_value(), 10, 11, 12, 13, 14, i16::max_value()] => // [-8, i16::min_value(), -10, -11, -12, -13, -14, i16::min_value() + 1]); // // test_bop!(i32x4[i32; 4] | add[i32x4_add_test]: // ([0, -1, 2, i32::max_value()], // [8, i32::min_value(), 10, 1]) => // [8, i32::max_value(), 12, i32::min_value()]); // test_bop!(i32x4[i32; 4] | sub[i32x4_sub_test]: // ([0, -1, 2, -1], // [8, i32::min_value(), 10, i32::max_value()]) => // [-8, i32::max_value(), -8, i32::min_value()]); // test_bop!(i32x4[i32; 4] | mul[i32x4_mul_test]: // ([0, -2, 2, 2], // [8, i32::min_value(), 10, i32::max_value()]) => // [0, 0, 20, -2]); // test_uop!(i32x4[i32; 4] | neg[i32x4_neg_test]: // [8, i32::min_value(), 10, i32::max_value()] => // [-8, i32::min_value(), -10, i32::min_value() + 1]); // // test_bop!(i64x2[i64; 2] | add[i64x2_add_test]: // ([-1, i64::max_value()], // [i64::min_value(), 1]) => // [i64::max_value(), i64::min_value()]); // test_bop!(i64x2[i64; 2] | sub[i64x2_sub_test]: // ([-1, -1], // [i64::min_value(), i64::max_value()]) => // [ i64::max_value(), i64::min_value()]); // // note: mul for i64x2 is not part of the spec // test_uop!(i64x2[i64; 2] | neg[i64x2_neg_test]: // [i64::min_value(), i64::max_value()] => // [i64::min_value(), i64::min_value() + 1]); // // test_bops!(i8x16[i8; 16] | shl[i8x16_shl_test]: // ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) => // [0, -2, 4, 6, 8, 10, 12, -2, 2, 2, 2, 2, 2, 2, 2, 2]); // test_bops!(i16x8[i16; 8] | shl[i16x8_shl_test]: // ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) => // [0, -2, 4, 6, 8, 10, 12, -2]); // test_bops!(i32x4[i32; 4] | shl[i32x4_shl_test]: // ([0, -1, 2, 3], 1) => [0, -2, 4, 6]); // test_bops!(i64x2[i64; 2] | shl[i64x2_shl_test]: // ([0, -1], 1) => [0, -2]); // // test_bops!(i8x16[i8; 16] | shr_s[i8x16_shr_s_test]: // ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) => // [0, -1, 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); // test_bops!(i16x8[i16; 8] | shr_s[i16x8_shr_s_test]: // ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) => // [0, -1, 1, 1, 2, 2, 3, i16::max_value() / 2]); // test_bops!(i32x4[i32; 4] | shr_s[i32x4_shr_s_test]: // ([0, -1, 2, 3], 1) => [0, -1, 1, 1]); // test_bops!(i64x2[i64; 2] | shr_s[i64x2_shr_s_test]: // ([0, -1], 1) => [0, -1]); // // test_bops!(i8x16[i8; 16] | shr_u[i8x16_uhr_u_test]: // ([0, -1, 2, 3, 4, 5, 6, i8::max_value(), 1, 1, 1, 1, 1, 1, 1, 1], 1) => // [0, i8::max_value(), 1, 1, 2, 2, 3, 63, 0, 0, 0, 0, 0, 0, 0, 0]); // test_bops!(i16x8[i16; 8] | shr_u[i16x8_uhr_u_test]: // ([0, -1, 2, 3, 4, 5, 6, i16::max_value()], 1) => // [0, i16::max_value(), 1, 1, 2, 2, 3, i16::max_value() / 2]); // test_bops!(i32x4[i32; 4] | shr_u[i32x4_uhr_u_test]: // ([0, -1, 2, 3], 1) => [0, i32::max_value(), 1, 1]); // test_bops!(i64x2[i64; 2] | shr_u[i64x2_uhr_u_test]: // ([0, -1], 1) => [0, i64::max_value()]); // // #[wasm_bindgen_test] // fn v128_bitwise_logical_ops() { // unsafe { // let a: [u32; 4] = [u32::max_value(), 0, u32::max_value(), 0]; // let b: [u32; 4] = [u32::max_value(); 4]; // let c: [u32; 4] = [0; 4]; // // let vec_a: v128 = transmute(a); // let vec_b: v128 = transmute(b); // let vec_c: v128 = transmute(c); // // let r: v128 = v128::and(vec_a, vec_a); // compare_bytes(r, vec_a); // let r: v128 = v128::and(vec_a, vec_b); // compare_bytes(r, vec_a); // let r: v128 = v128::or(vec_a, vec_b); // compare_bytes(r, vec_b); // let r: v128 = v128::not(vec_b); // compare_bytes(r, vec_c); // let r: v128 = v128::xor(vec_a, vec_c); // compare_bytes(r, vec_a); // // let r: v128 = v128::bitselect(vec_b, vec_c, vec_b); // compare_bytes(r, vec_b); // let r: v128 = v128::bitselect(vec_b, vec_c, vec_c); // compare_bytes(r, vec_c); // let r: v128 = v128::bitselect(vec_b, vec_c, vec_a); // compare_bytes(r, vec_a); // } // } // // macro_rules! test_bool_red { // ($id:ident[$test_id:ident] | [$($true:expr),*] | [$($false:expr),*] | [$($alt:expr),*]) => { // #[wasm_bindgen_test] // fn $test_id() { // unsafe { // let vec_a: v128 = transmute([$($true),*]); // true // let vec_b: v128 = transmute([$($false),*]); // false // let vec_c: v128 = transmute([$($alt),*]); // alternating // // assert_eq!($id::any_true(vec_a), 1); // assert_eq!($id::any_true(vec_b), 0); // assert_eq!($id::any_true(vec_c), 1); // // assert_eq!($id::all_true(vec_a), 1); // assert_eq!($id::all_true(vec_b), 0); // assert_eq!($id::all_true(vec_c), 0); // } // } // } // } // // test_bool_red!( // i8x16[i8x16_boolean_reductions] // | [1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] // | [0_i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] // | [1_i8, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0] // ); // test_bool_red!( // i16x8[i16x8_boolean_reductions] // | [1_i16, 1, 1, 1, 1, 1, 1, 1] // | [0_i16, 0, 0, 0, 0, 0, 0, 0] // | [1_i16, 0, 1, 0, 1, 0, 1, 0] // ); // test_bool_red!( // i32x4[i32x4_boolean_reductions] // | [1_i32, 1, 1, 1] // | [0_i32, 0, 0, 0] // | [1_i32, 0, 1, 0] // ); // test_bool_red!( // i64x2[i64x2_boolean_reductions] | [1_i64, 1] | [0_i64, 0] | [1_i64, 0] // ); // // test_bop!(i8x16[i8; 16] | eq[i8x16_eq_test]: // ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], // [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => // [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); // test_bop!(i16x8[i16; 8] | eq[i16x8_eq_test]: // ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => // [-1, 0, -1, 0 ,-1, 0, -1, -1]); // test_bop!(i32x4[i32; 4] | eq[i32x4_eq_test]: // ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); // test_bop!(i64x2[i64; 2] | eq[i64x2_eq_test]: ([0, 1], [0, 2]) => [-1, 0]); // test_bop!(f32x4[f32; 4] => i32 | eq[f32x4_eq_test]: // ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); // test_bop!(f64x2[f64; 2] => i64 | eq[f64x2_eq_test]: ([0., 1.], [0., 2.]) => [-1, 0]); // // test_bop!(i8x16[i8; 16] | ne[i8x16_ne_test]: // ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], // [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => // [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); // test_bop!(i16x8[i16; 8] | ne[i16x8_ne_test]: // ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => // [0, -1, 0, -1 ,0, -1, 0, 0]); // test_bop!(i32x4[i32; 4] | ne[i32x4_ne_test]: // ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); // test_bop!(i64x2[i64; 2] | ne[i64x2_ne_test]: ([0, 1], [0, 2]) => [0, -1]); // test_bop!(f32x4[f32; 4] => i32 | ne[f32x4_ne_test]: // ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); // test_bop!(f64x2[f64; 2] => i64 | ne[f64x2_ne_test]: ([0., 1.], [0., 2.]) => [0, -1]); // // test_bop!(i8x16[i8; 16] | lt[i8x16_lt_test]: // ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], // [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => // [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); // test_bop!(i16x8[i16; 8] | lt[i16x8_lt_test]: // ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => // [0, -1, 0, -1 ,0, -1, 0, 0]); // test_bop!(i32x4[i32; 4] | lt[i32x4_lt_test]: // ([0, 1, 2, 3], [0, 2, 2, 4]) => [0, -1, 0, -1]); // test_bop!(i64x2[i64; 2] | lt[i64x2_lt_test]: ([0, 1], [0, 2]) => [0, -1]); // test_bop!(f32x4[f32; 4] => i32 | lt[f32x4_lt_test]: // ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [0, -1, 0, -1]); // test_bop!(f64x2[f64; 2] => i64 | lt[f64x2_lt_test]: ([0., 1.], [0., 2.]) => [0, -1]); // // test_bop!(i8x16[i8; 16] | gt[i8x16_gt_test]: // ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], // [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) => // [0, -1, 0, -1 ,0, -1, 0, 0, 0, -1, 0, -1 ,0, -1, 0, 0]); // test_bop!(i16x8[i16; 8] | gt[i16x8_gt_test]: // ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => // [0, -1, 0, -1 ,0, -1, 0, 0]); // test_bop!(i32x4[i32; 4] | gt[i32x4_gt_test]: // ([0, 2, 2, 4], [0, 1, 2, 3]) => [0, -1, 0, -1]); // test_bop!(i64x2[i64; 2] | gt[i64x2_gt_test]: ([0, 2], [0, 1]) => [0, -1]); // test_bop!(f32x4[f32; 4] => i32 | gt[f32x4_gt_test]: // ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [0, -1, 0, -1]); // test_bop!(f64x2[f64; 2] => i64 | gt[f64x2_gt_test]: ([0., 2.], [0., 1.]) => [0, -1]); // // test_bop!(i8x16[i8; 16] | ge[i8x16_ge_test]: // ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], // [0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15]) => // [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); // test_bop!(i16x8[i16; 8] | ge[i16x8_ge_test]: // ([0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 2, 4, 4, 6, 6, 7]) => // [-1, 0, -1, 0 ,-1, 0, -1, -1]); // test_bop!(i32x4[i32; 4] | ge[i32x4_ge_test]: // ([0, 1, 2, 3], [0, 2, 2, 4]) => [-1, 0, -1, 0]); // test_bop!(i64x2[i64; 2] | ge[i64x2_ge_test]: ([0, 1], [0, 2]) => [-1, 0]); // test_bop!(f32x4[f32; 4] => i32 | ge[f32x4_ge_test]: // ([0., 1., 2., 3.], [0., 2., 2., 4.]) => [-1, 0, -1, 0]); // test_bop!(f64x2[f64; 2] => i64 | ge[f64x2_ge_test]: ([0., 1.], [0., 2.]) => [-1, 0]); // // test_bop!(i8x16[i8; 16] | le[i8x16_le_test]: // ([0, 2, 2, 4, 4, 6, 6, 7, 8, 10, 10, 12, 12, 14, 14, 15], // [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] // ) => // [-1, 0, -1, 0 ,-1, 0, -1, -1, -1, 0, -1, 0 ,-1, 0, -1, -1]); // test_bop!(i16x8[i16; 8] | le[i16x8_le_test]: // ([0, 2, 2, 4, 4, 6, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7]) => // [-1, 0, -1, 0 ,-1, 0, -1, -1]); // test_bop!(i32x4[i32; 4] | le[i32x4_le_test]: // ([0, 2, 2, 4], [0, 1, 2, 3]) => [-1, 0, -1, 0]); // test_bop!(i64x2[i64; 2] | le[i64x2_le_test]: ([0, 2], [0, 1]) => [-1, 0]); // test_bop!(f32x4[f32; 4] => i32 | le[f32x4_le_test]: // ([0., 2., 2., 4.], [0., 1., 2., 3.]) => [-1, 0, -1, -0]); // test_bop!(f64x2[f64; 2] => i64 | le[f64x2_le_test]: ([0., 2.], [0., 1.]) => [-1, 0]); // // #[wasm_bindgen_test] // fn v128_bitwise_load_store() { // unsafe { // let mut arr: [i32; 4] = [0, 1, 2, 3]; // // let vec = v128::load(arr.as_ptr() as *const v128); // let vec = i32x4::add(vec, vec); // v128::store(arr.as_mut_ptr() as *mut v128, vec); // // assert_eq!(arr, [0, 2, 4, 6]); // } // } // // test_uop!(f32x4[f32; 4] | neg[f32x4_neg_test]: [0., 1., 2., 3.] => [ 0., -1., -2., -3.]); // test_uop!(f32x4[f32; 4] | abs[f32x4_abs_test]: [0., -1., 2., -3.] => [ 0., 1., 2., 3.]); // test_bop!(f32x4[f32; 4] | min[f32x4_min_test]: // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., -3., -4., 8.]); // test_bop!(f32x4[f32; 4] | min[f32x4_min_test_nan]: // ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) // => [0., -3., -4., std::f32::NAN]); // test_bop!(f32x4[f32; 4] | max[f32x4_max_test]: // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -1., 7., 10.]); // test_bop!(f32x4[f32; 4] | max[f32x4_max_test_nan]: // ([0., -1., 7., 8.], [1., -3., -4., std::f32::NAN]) // => [1., -1., 7., std::f32::NAN]); // test_bop!(f32x4[f32; 4] | add[f32x4_add_test]: // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [1., -4., 3., 18.]); // test_bop!(f32x4[f32; 4] | sub[f32x4_sub_test]: // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [-1., 2., 11., -2.]); // test_bop!(f32x4[f32; 4] | mul[f32x4_mul_test]: // ([0., -1., 7., 8.], [1., -3., -4., 10.]) => [0., 3., -28., 80.]); // test_bop!(f32x4[f32; 4] | div[f32x4_div_test]: // ([0., -8., 70., 8.], [1., 4., 10., 2.]) => [0., -2., 7., 4.]); // // test_uop!(f64x2[f64; 2] | neg[f64x2_neg_test]: [0., 1.] => [ 0., -1.]); // test_uop!(f64x2[f64; 2] | abs[f64x2_abs_test]: [0., -1.] => [ 0., 1.]); // test_bop!(f64x2[f64; 2] | min[f64x2_min_test]: // ([0., -1.], [1., -3.]) => [0., -3.]); // test_bop!(f64x2[f64; 2] | min[f64x2_min_test_nan]: // ([7., 8.], [-4., std::f64::NAN]) // => [ -4., std::f64::NAN]); // test_bop!(f64x2[f64; 2] | max[f64x2_max_test]: // ([0., -1.], [1., -3.]) => [1., -1.]); // test_bop!(f64x2[f64; 2] | max[f64x2_max_test_nan]: // ([7., 8.], [ -4., std::f64::NAN]) // => [7., std::f64::NAN]); // test_bop!(f64x2[f64; 2] | add[f64x2_add_test]: // ([0., -1.], [1., -3.]) => [1., -4.]); // test_bop!(f64x2[f64; 2] | sub[f64x2_sub_test]: // ([0., -1.], [1., -3.]) => [-1., 2.]); // test_bop!(f64x2[f64; 2] | mul[f64x2_mul_test]: // ([0., -1.], [1., -3.]) => [0., 3.]); // test_bop!(f64x2[f64; 2] | div[f64x2_div_test]: // ([0., -8.], [1., 4.]) => [0., -2.]); // // macro_rules! test_conv { // ($test_id:ident | $conv_id:ident | $to_ty:ident | $from:expr, $to:expr) => { // #[wasm_bindgen_test] // fn $test_id() { // unsafe { // let from: v128 = transmute($from); // let to: v128 = transmute($to); // // let r: v128 = $to_ty::$conv_id(from); // // compare_bytes(r, to); // } // } // }; // } // // test_conv!( // f32x4_convert_s_i32x4 | convert_s_i32x4 | f32x4 | [1_i32, 2, 3, 4], // [1_f32, 2., 3., 4.] // ); // test_conv!( // f32x4_convert_u_i32x4 // | convert_u_i32x4 // | f32x4 // | [u32::max_value(), 2, 3, 4], // [u32::max_value() as f32, 2., 3., 4.] // ); // test_conv!( // f64x2_convert_s_i64x2 | convert_s_i64x2 | f64x2 | [1_i64, 2], // [1_f64, 2.] // ); // test_conv!( // f64x2_convert_u_i64x2 // | convert_u_i64x2 // | f64x2 // | [u64::max_value(), 2], // [18446744073709552000.0, 2.] // ); // // // FIXME: this fails, and produces -2147483648 instead of saturating at // // i32::max_value() test_conv!(i32x4_trunc_s_f32x4_sat | trunc_s_f32x4_sat // // | i32x4 | [1_f32, 2., (i32::max_value() as f32 + 1.), 4.], // // [1_i32, 2, i32::max_value(), 4]); FIXME: add other saturating tests // } core_arch-0.1.5/src/x86/abm.rs010064400007650000024000000037451343447103600142200ustar0000000000000000//! Advanced Bit Manipulation (ABM) instructions //! //! The POPCNT and LZCNT have their own CPUID bits to indicate support. //! //! The references are: //! //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: //! Instruction Set Reference, A-Z][intel64_ref]. //! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and //! System Instructions][amd64_ref]. //! //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions //! available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wikipedia_bmi]: //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; /// Counts the leading most significant zero bits. /// /// When the operand is zero, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32) #[inline] #[target_feature(enable = "lzcnt")] #[cfg_attr(test, assert_instr(lzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _lzcnt_u32(x: u32) -> u32 { x.leading_zeros() } /// Counts the bits that are set. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt32) #[inline] #[target_feature(enable = "popcnt")] #[cfg_attr(test, assert_instr(popcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _popcnt32(x: i32) -> i32 { x.count_ones() as i32 } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "lzcnt")] unsafe fn test_lzcnt_u32() { assert_eq!(_lzcnt_u32(0b0101_1010), 25); } #[simd_test(enable = "popcnt")] unsafe fn test_popcnt32() { assert_eq!(_popcnt32(0b0101_1010), 4); } } core_arch-0.1.5/src/x86/adx.rs010064400007650000024000000104611345562034300142260ustar0000000000000000#[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "unadjusted" { #[link_name = "llvm.x86.addcarry.32"] fn llvm_addcarry_u32(a: u8, b: u32, c: u32) -> (u8, u32); #[link_name = "llvm.x86.addcarryx.u32"] fn llvm_addcarryx_u32(a: u8, b: u32, c: u32, d: *mut u8) -> u8; #[link_name = "llvm.x86.subborrow.32"] fn llvm_subborrow_u32(a: u8, b: u32, c: u32) -> (u8, u32); } /// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` /// (carry flag), and store the unsigned 32-bit result in `out`, and the carry-out /// is returned (carry or overflow flag). #[inline] #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] pub unsafe fn _addcarry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { let (a, b) = llvm_addcarry_u32(c_in, a, b); *out = b; a } /// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` /// (carry or overflow flag), and store the unsigned 32-bit result in `out`, and /// the carry-out is returned (carry or overflow flag). #[inline] #[target_feature(enable = "adx")] #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] pub unsafe fn _addcarryx_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { llvm_addcarryx_u32(c_in, a, b, out as *mut _ as *mut u8) } /// Adds unsigned 32-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` /// (carry or overflow flag), and store the unsigned 32-bit result in `out`, and /// the carry-out is returned (carry or overflow flag). #[inline] #[cfg_attr(test, assert_instr(sbb))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] pub unsafe fn _subborrow_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8 { let (a, b) = llvm_subborrow_u32(c_in, a, b); *out = b; a } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[test] fn test_addcarry_u32() { unsafe { let a = u32::max_value(); let mut out = 0; let r = _addcarry_u32(0, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); let r = _addcarry_u32(0, a, 0, &mut out); assert_eq!(r, 0); assert_eq!(out, a); let r = _addcarry_u32(1, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 1); let r = _addcarry_u32(1, a, 0, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); let r = _addcarry_u32(0, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 7); let r = _addcarry_u32(1, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 8); } } #[simd_test(enable = "adx")] unsafe fn test_addcarryx_u32() { let a = u32::max_value(); let mut out = 0; let r = _addcarry_u32(0, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); let r = _addcarry_u32(0, a, 0, &mut out); assert_eq!(r, 0); assert_eq!(out, a); let r = _addcarry_u32(1, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 1); let r = _addcarry_u32(1, a, 0, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); let r = _addcarry_u32(0, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 7); let r = _addcarry_u32(1, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 8); } #[test] fn test_subborrow_u32() { unsafe { let a = u32::max_value(); let mut out = 0; let r = _subborrow_u32(0, 0, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, a); let r = _subborrow_u32(0, 0, 0, &mut out); assert_eq!(r, 0); assert_eq!(out, 0); let r = _subborrow_u32(1, 0, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, a - 1); let r = _subborrow_u32(1, 0, 0, &mut out); assert_eq!(r, 1); assert_eq!(out, a); let r = _subborrow_u32(0, 7, 3, &mut out); assert_eq!(r, 0); assert_eq!(out, 4); let r = _subborrow_u32(1, 7, 3, &mut out); assert_eq!(r, 0); assert_eq!(out, 3); } } } core_arch-0.1.5/src/x86/aes.rs010064400007650000024000000155631343447103600142320ustar0000000000000000//! AES New Instructions (AES-NI) //! //! The intrinsics here correspond to those in the `wmmintrin.h` C header. //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf use crate::core_arch::x86::__m128i; #[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.aesni.aesdec"] fn aesdec(a: __m128i, round_key: __m128i) -> __m128i; #[link_name = "llvm.x86.aesni.aesdeclast"] fn aesdeclast(a: __m128i, round_key: __m128i) -> __m128i; #[link_name = "llvm.x86.aesni.aesenc"] fn aesenc(a: __m128i, round_key: __m128i) -> __m128i; #[link_name = "llvm.x86.aesni.aesenclast"] fn aesenclast(a: __m128i, round_key: __m128i) -> __m128i; #[link_name = "llvm.x86.aesni.aesimc"] fn aesimc(a: __m128i) -> __m128i; #[link_name = "llvm.x86.aesni.aeskeygenassist"] fn aeskeygenassist(a: __m128i, imm8: u8) -> __m128i; } /// Performs one round of an AES decryption flow on data (state) in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdec_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesdec))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i { aesdec(a, round_key) } /// Performs the last round of an AES decryption flow on data (state) in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdeclast_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesdeclast))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i { aesdeclast(a, round_key) } /// Performs one round of an AES encryption flow on data (state) in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenc_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesenc))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i { aesenc(a, round_key) } /// Performs the last round of an AES encryption flow on data (state) in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesenclast))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i { aesenclast(a, round_key) } /// Performs the `InvMixColumns` transformation on `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesimc_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aesimc))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i { aesimc(a) } /// Assist in expanding the AES cipher key. /// /// Assist in expanding the AES cipher key by computing steps towards /// generating a round key for encryption cipher using data from `a` and an /// 8-bit round constant `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aeskeygenassist_si128) #[inline] #[target_feature(enable = "aes")] #[cfg_attr(test, assert_instr(aeskeygenassist, imm8 = 0))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_aeskeygenassist_si128(a: __m128i, imm8: i32) -> __m128i { macro_rules! call { ($imm8:expr) => { aeskeygenassist(a, $imm8) }; } constify_imm8!(imm8, call) } #[cfg(test)] mod tests { // The constants in the tests below are just bit patterns. They should not // be interpreted as integers; signedness does not make sense for them, but // __m128i happens to be defined in terms of signed integers. #![allow(overflowing_literals)] use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "aes")] unsafe fn test_mm_aesdec_si128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee); let r = _mm_aesdec_si128(a, k); assert_eq_m128i(r, e); } #[simd_test(enable = "aes")] unsafe fn test_mm_aesdeclast_si128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493); let r = _mm_aesdeclast_si128(a, k); assert_eq_m128i(r, e); } #[simd_test(enable = "aes")] unsafe fn test_mm_aesenc_si128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333); let r = _mm_aesenc_si128(a, k); assert_eq_m128i(r, e); } #[simd_test(enable = "aes")] unsafe fn test_mm_aesenclast_si128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee); let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8); let r = _mm_aesenclast_si128(a, k); assert_eq_m128i(r, e); } #[simd_test(enable = "aes")] unsafe fn test_mm_aesimc_si128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc714195.aspx. let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); let e = _mm_set_epi64x(0xc66c82284ee40aa0, 0x6633441122770055); let r = _mm_aesimc_si128(a); assert_eq_m128i(r, e); } #[simd_test(enable = "aes")] unsafe fn test_mm_aeskeygenassist_si128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc714138.aspx. let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff); let e = _mm_set_epi64x(0x857c266b7c266e85, 0xeac4eea9c4eeacea); let r = _mm_aeskeygenassist_si128(a, 5); assert_eq_m128i(r, e); } } core_arch-0.1.5/src/x86/avx.rs010064400007650000024000005517171345561510300142640ustar0000000000000000//! Advanced Vector Extensions (AVX) //! //! The references are: //! //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: //! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture //! Programmer's Manual, Volume 3: General-Purpose and System //! Instructions][amd64_ref]. //! //! [Wikipedia][wiki] provides a quick overview of the instructions available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, intrinsics, mem::{self, transmute}, ptr, }; #[cfg(test)] use stdsimd_test::assert_instr; /// Adds packed double-precision (64-bit) floating-point elements /// in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d { simd_add(a, b) } /// Adds packed single-precision (32-bit) floating-point elements in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { simd_add(a, b) } /// Computes the bitwise AND of a packed double-precision (64-bit) /// floating-point elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_pd) #[inline] #[target_feature(enable = "avx")] // FIXME: Should be 'vandpd' instuction. // See https://github.com/rust-lang-nursery/stdsimd/issues/71 #[cfg_attr(test, assert_instr(vandps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = transmute(a); let b: u64x4 = transmute(b); transmute(simd_and(a, b)) } /// Computes the bitwise AND of packed single-precision (32-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = transmute(a); let b: u32x8 = transmute(b); transmute(simd_and(a, b)) } /// Computes the bitwise OR packed double-precision (64-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_pd) #[inline] #[target_feature(enable = "avx")] // FIXME: should be `vorpd` instuction. // See . #[cfg_attr(test, assert_instr(vorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = transmute(a); let b: u64x4 = transmute(b); transmute(simd_or(a, b)) } /// Computes the bitwise OR packed single-precision (32-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = transmute(a); let b: u32x8 = transmute(b); transmute(simd_or(a, b)) } /// Shuffles double-precision (64-bit) floating-point elements within 128-bit /// lanes using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vshufpd, imm8 = 0x1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { simd_shuffle4(a, b, [$a, $b, $c, $d]); }; } macro_rules! shuffle3 { ($a:expr, $b:expr, $c:expr) => { match (imm8 >> 3) & 0x1 { 0 => shuffle4!($a, $b, $c, 6), _ => shuffle4!($a, $b, $c, 7), } }; } macro_rules! shuffle2 { ($a:expr, $b:expr) => { match (imm8 >> 2) & 0x1 { 0 => shuffle3!($a, $b, 2), _ => shuffle3!($a, $b, 3), } }; } macro_rules! shuffle1 { ($a:expr) => { match (imm8 >> 1) & 0x1 { 0 => shuffle2!($a, 4), _ => shuffle2!($a, 5), } }; } match imm8 & 0x1 { 0 => shuffle1!(0), _ => shuffle1!(1), } } /// Shuffles single-precision (32-bit) floating-point elements in `a` within /// 128-bit lanes using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vshufps, imm8 = 0x0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { ( $a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr ) => { simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]); }; } macro_rules! shuffle3 { ($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => { match (imm8 >> 6) & 0x3 { 0 => shuffle4!($a, $b, $c, 8, $e, $f, $g, 12), 1 => shuffle4!($a, $b, $c, 9, $e, $f, $g, 13), 2 => shuffle4!($a, $b, $c, 10, $e, $f, $g, 14), _ => shuffle4!($a, $b, $c, 11, $e, $f, $g, 15), } }; } macro_rules! shuffle2 { ($a:expr, $b:expr, $e:expr, $f:expr) => { match (imm8 >> 4) & 0x3 { 0 => shuffle3!($a, $b, 8, $e, $f, 12), 1 => shuffle3!($a, $b, 9, $e, $f, 13), 2 => shuffle3!($a, $b, 10, $e, $f, 14), _ => shuffle3!($a, $b, 11, $e, $f, 15), } }; } macro_rules! shuffle1 { ($a:expr, $e:expr) => { match (imm8 >> 2) & 0x3 { 0 => shuffle2!($a, 0, $e, 4), 1 => shuffle2!($a, 1, $e, 5), 2 => shuffle2!($a, 2, $e, 6), _ => shuffle2!($a, 3, $e, 7), } }; } match imm8 & 0x3 { 0 => shuffle1!(0, 4), 1 => shuffle1!(1, 5), 2 => shuffle1!(2, 6), _ => shuffle1!(3, 7), } } /// Computes the bitwise NOT of packed double-precision (64-bit) floating-point /// elements in `a`, and then AND with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_pd) #[inline] #[target_feature(enable = "avx")] // FIXME: should be `vandnpd` instruction. #[cfg_attr(test, assert_instr(vandnps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = transmute(a); let b: u64x4 = transmute(b); transmute(simd_and(simd_xor(u64x4::splat(!(0_u64)), a), b)) } /// Computes the bitwise NOT of packed single-precision (32-bit) floating-point /// elements in `a` /// and then AND with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vandnps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = transmute(a); let b: u32x8 = transmute(b); transmute(simd_and(simd_xor(u32x8::splat(!(0_u32)), a), b)) } /// Compares packed double-precision (64-bit) floating-point elements /// in `a` and `b`, and returns packed maximum values /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d { maxpd256(a, b) } /// Compares packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and returns packed maximum values /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaxps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_ps(a: __m256, b: __m256) -> __m256 { maxps256(a, b) } /// Compares packed double-precision (64-bit) floating-point elements /// in `a` and `b`, and returns packed minimum values /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d { minpd256(a, b) } /// Compares packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and returns packed minimum values /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vminps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 { minps256(a, b) } /// Multiplies packed double-precision (64-bit) floating-point elements /// in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d { simd_mul(a, b) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmulps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 { simd_mul(a, b) } /// Alternatively adds and subtracts packed double-precision (64-bit) /// floating-point elements in `a` to/from packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_addsub_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d { addsubpd256(a, b) } /// Alternatively adds and subtracts packed single-precision (32-bit) /// floating-point elements in `a` to/from packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_addsub_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vaddsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 { addsubps256(a, b) } /// Subtracts packed double-precision (64-bit) floating-point elements in `b` /// from packed elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d { simd_sub(a, b) } /// Subtracts packed single-precision (32-bit) floating-point elements in `b` /// from packed elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 { simd_sub(a, b) } /// Computes the division of each of the 8 packed 32-bit floating-point elements /// in `a` by the corresponding packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 { simd_div(a, b) } /// Computes the division of each of the 4 packed 64-bit floating-point elements /// in `a` by the corresponding packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_div_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdivpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d { simd_div(a, b) } /// Rounds packed double-precision (64-bit) floating point elements in `a` /// according to the flag `b`. The value of `b` may be as follows: /// /// - `0x00`: Round to the nearest whole number. /// - `0x01`: Round down, toward negative infinity. /// - `0x02`: Round up, toward positive infinity. /// - `0x03`: Truncate the values. /// /// For a complete list of options, check [the LLVM docs][llvm_docs]. /// /// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382 /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_round_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd, b = 0x3))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_round_pd(a: __m256d, b: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { roundpd256(a, $imm8) }; } constify_imm8!(b, call) } /// Rounds packed double-precision (64-bit) floating point elements in `a` /// toward positive infinity. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ceil_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_ceil_pd(a: __m256d) -> __m256d { roundpd256(a, 0x02) } /// Rounds packed double-precision (64-bit) floating point elements in `a` /// toward negative infinity. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_floor_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_floor_pd(a: __m256d) -> __m256d { roundpd256(a, 0x01) } /// Rounds packed single-precision (32-bit) floating point elements in `a` /// according to the flag `b`. The value of `b` may be as follows: /// /// - `0x00`: Round to the nearest whole number. /// - `0x01`: Round down, toward negative infinity. /// - `0x02`: Round up, toward positive infinity. /// - `0x03`: Truncate the values. /// /// For a complete list of options, check [the LLVM docs][llvm_docs]. /// /// [llvm_docs]: https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382 /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_round_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps, b = 0x00))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_round_ps(a: __m256, b: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { roundps256(a, $imm8) }; } constify_imm8!(b, call) } /// Rounds packed single-precision (32-bit) floating point elements in `a` /// toward positive infinity. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_ceil_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_ceil_ps(a: __m256) -> __m256 { roundps256(a, 0x02) } /// Rounds packed single-precision (32-bit) floating point elements in `a` /// toward negative infinity. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_floor_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vroundps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_floor_ps(a: __m256) -> __m256 { roundps256(a, 0x01) } /// Returns the square root of packed single-precision (32-bit) floating point /// elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sqrt_ps(a: __m256) -> __m256 { sqrtps256(a) } /// Returns the square root of packed double-precision (64-bit) floating point /// elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sqrt_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vsqrtpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sqrt_pd(a: __m256d) -> __m256d { sqrtpd256(a) } /// Blends packed double-precision (64-bit) floating-point elements from /// `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_pd) #[inline] #[target_feature(enable = "avx")] // Note: LLVM7 prefers single-precision blend instructions when // possible, see: https://bugs.llvm.org/show_bug.cgi?id=38194 // #[cfg_attr(test, assert_instr(vblendpd, imm8 = 9))] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! blend4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { simd_shuffle4(a, b, [$a, $b, $c, $d]); }; } macro_rules! blend3 { ($a:expr, $b:expr, $c:expr) => { match imm8 & 0x8 { 0 => blend4!($a, $b, $c, 3), _ => blend4!($a, $b, $c, 7), } }; } macro_rules! blend2 { ($a:expr, $b:expr) => { match imm8 & 0x4 { 0 => blend3!($a, $b, 2), _ => blend3!($a, $b, 6), } }; } macro_rules! blend1 { ($a:expr) => { match imm8 & 0x2 { 0 => blend2!($a, 1), _ => blend2!($a, 5), } }; } match imm8 & 0x1 { 0 => blend1!(0), _ => blend1!(4), } } /// Blends packed single-precision (32-bit) floating-point elements from /// `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! blend4 { ( $a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr ) => { simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]); }; } macro_rules! blend3 { ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => blend4!($a, $b, $c, $d, $e, $f, 6, 7), 0b01 => blend4!($a, $b, $c, $d, $e, $f, 14, 7), 0b10 => blend4!($a, $b, $c, $d, $e, $f, 6, 15), _ => blend4!($a, $b, $c, $d, $e, $f, 14, 15), } }; } macro_rules! blend2 { ($a:expr, $b:expr, $c:expr, $d:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => blend3!($a, $b, $c, $d, 4, 5), 0b01 => blend3!($a, $b, $c, $d, 12, 5), 0b10 => blend3!($a, $b, $c, $d, 4, 13), _ => blend3!($a, $b, $c, $d, 12, 13), } }; } macro_rules! blend1 { ($a:expr, $b:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => blend2!($a, $b, 2, 3), 0b01 => blend2!($a, $b, 10, 3), 0b10 => blend2!($a, $b, 2, 11), _ => blend2!($a, $b, 10, 11), } }; } match imm8 & 0b11 { 0b00 => blend1!(0, 1), 0b01 => blend1!(8, 1), 0b10 => blend1!(0, 9), _ => blend1!(8, 9), } } /// Blends packed double-precision (64-bit) floating-point elements from /// `a` and `b` using `c` as a mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vblendvpd(a, b, c) } /// Blends packed single-precision (32-bit) floating-point elements from /// `a` and `b` using `c` as a mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vblendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vblendvps(a, b, c) } /// Conditionally multiplies the packed single-precision (32-bit) floating-point /// elements in `a` and `b` using the high 4 bits in `imm8`, /// sum the four products, and conditionally return the sum /// using the low 4 bits of `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_dp_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vdpps, imm8 = 0x0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_dp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { vdpps(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Horizontal addition of adjacent pairs in the two packed vectors /// of 4 64-bit floating points `a` and `b`. /// In the result, sums of elements from `a` are returned in even locations, /// while sums of elements from `b` are returned in odd locations. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d { vhaddpd(a, b) } /// Horizontal addition of adjacent pairs in the two packed vectors /// of 8 32-bit floating points `a` and `b`. /// In the result, sums of elements from `a` are returned in locations of /// indices 0, 1, 4, 5; while sums of elements from `b` are locations /// 2, 3, 6, 7. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhaddps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 { vhaddps(a, b) } /// Horizontal subtraction of adjacent pairs in the two packed vectors /// of 4 64-bit floating points `a` and `b`. /// In the result, sums of elements from `a` are returned in even locations, /// while sums of elements from `b` are returned in odd locations. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d { vhsubpd(a, b) } /// Horizontal subtraction of adjacent pairs in the two packed vectors /// of 8 32-bit floating points `a` and `b`. /// In the result, sums of elements from `a` are returned in locations of /// indices 0, 1, 4, 5; while sums of elements from `b` are locations /// 2, 3, 6, 7. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vhsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { vhsubps(a, b) } /// Computes the bitwise XOR of packed double-precision (64-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_pd) #[inline] #[target_feature(enable = "avx")] // FIXME Should be 'vxorpd' instruction. #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = transmute(a); let b: u64x4 = transmute(b); transmute(simd_xor(a, b)) } /// Computes the bitwise XOR of packed single-precision (32-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 { let a: u32x8 = transmute(a); let b: u32x8 = transmute(b); transmute(simd_xor(a, b)) } /// Equal (ordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_EQ_OQ: i32 = 0x00; /// Less-than (ordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_LT_OS: i32 = 0x01; /// Less-than-or-equal (ordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_LE_OS: i32 = 0x02; /// Unordered (non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_UNORD_Q: i32 = 0x03; /// Not-equal (unordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NEQ_UQ: i32 = 0x04; /// Not-less-than (unordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NLT_US: i32 = 0x05; /// Not-less-than-or-equal (unordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NLE_US: i32 = 0x06; /// Ordered (non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_ORD_Q: i32 = 0x07; /// Equal (unordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_EQ_UQ: i32 = 0x08; /// Not-greater-than-or-equal (unordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NGE_US: i32 = 0x09; /// Not-greater-than (unordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NGT_US: i32 = 0x0a; /// False (ordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_FALSE_OQ: i32 = 0x0b; /// Not-equal (ordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NEQ_OQ: i32 = 0x0c; /// Greater-than-or-equal (ordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_GE_OS: i32 = 0x0d; /// Greater-than (ordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_GT_OS: i32 = 0x0e; /// True (unordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_TRUE_UQ: i32 = 0x0f; /// Equal (ordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_EQ_OS: i32 = 0x10; /// Less-than (ordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_LT_OQ: i32 = 0x11; /// Less-than-or-equal (ordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_LE_OQ: i32 = 0x12; /// Unordered (signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_UNORD_S: i32 = 0x13; /// Not-equal (unordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NEQ_US: i32 = 0x14; /// Not-less-than (unordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NLT_UQ: i32 = 0x15; /// Not-less-than-or-equal (unordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NLE_UQ: i32 = 0x16; /// Ordered (signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_ORD_S: i32 = 0x17; /// Equal (unordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_EQ_US: i32 = 0x18; /// Not-greater-than-or-equal (unordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NGE_UQ: i32 = 0x19; /// Not-greater-than (unordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NGT_UQ: i32 = 0x1a; /// False (ordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_FALSE_OS: i32 = 0x1b; /// Not-equal (ordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_NEQ_OS: i32 = 0x1c; /// Greater-than-or-equal (ordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_GE_OQ: i32 = 0x1d; /// Greater-than (ordered, non-signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_GT_OQ: i32 = 0x1e; /// True (unordered, signaling) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _CMP_TRUE_US: i32 = 0x1f; /// Compares packed double-precision (64-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_pd) #[inline] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { vcmppd(a, b, $imm8) }; } constify_imm6!(imm8, call) } /// Compares packed double-precision (64-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcmpeqpd, imm8 = 0))] // TODO Validate vcmppd #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmp_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { vcmppd256(a, b, $imm8) }; } constify_imm6!(imm8, call) } /// Compares packed single-precision (32-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ps) #[inline] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { vcmpps(a, b, $imm8) }; } constify_imm6!(imm8, call) } /// Compares packed single-precision (32-bit) floating-point /// elements in `a` and `b` based on the comparison operand /// specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmp_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcmpeqps, imm8 = 0))] // TODO Validate vcmpps #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmp_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { vcmpps256(a, b, $imm8) }; } constify_imm6!(imm8, call) } /// Compares the lower double-precision (64-bit) floating-point element in /// `a` and `b` based on the comparison operand specified by `imm8`, /// store the result in the lower element of returned vector, /// and copies the upper element from `a` to the upper element of returned /// vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_sd) #[inline] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vcmpeqsd, imm8 = 0))] // TODO Validate vcmpsd #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { vcmpsd(a, b, $imm8) }; } constify_imm6!(imm8, call) } /// Compares the lower single-precision (32-bit) floating-point element in /// `a` and `b` based on the comparison operand specified by `imm8`, /// store the result in the lower element of returned vector, /// and copies the upper 3 packed elements from `a` to the upper elements of /// returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmp_ss) #[inline] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vcmpeqss, imm8 = 0))] // TODO Validate vcmpss #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmp_ss(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { vcmpss(a, b, $imm8) }; } constify_imm6!(imm8, call) } /// Converts packed 32-bit integers in `a` to packed double-precision (64-bit) /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d { simd_cast(a.as_i32x4()) } /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtdq2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 { vcvtdq2ps(a.as_i32x8()) } /// Converts packed double-precision (64-bit) floating-point elements in `a` /// to packed single-precision (32-bit) floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtpd_ps(a: __m256d) -> __m128 { vcvtpd2ps(a) } /// Converts packed single-precision (32-bit) floating-point elements in `a` /// to packed 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_epi32) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtps_epi32(a: __m256) -> __m256i { transmute(vcvtps2dq(a)) } /// Converts packed single-precision (32-bit) floating-point elements in `a` /// to packed double-precision (64-bit) floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtps2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtps_pd(a: __m128) -> __m256d { simd_cast(a) } /// Converts packed double-precision (64-bit) floating-point elements in `a` /// to packed 32-bit integers with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttpd_epi32) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i { transmute(vcvttpd2dq(a)) } /// Converts packed double-precision (64-bit) floating-point elements in `a` /// to packed 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_epi32) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvtpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i { transmute(vcvtpd2dq(a)) } /// Converts packed single-precision (32-bit) floating-point elements in `a` /// to packed 32-bit integers with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttps_epi32) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vcvttps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvttps_epi32(a: __m256) -> __m256i { transmute(vcvttps2dq(a)) } /// Extracts 128 bits (composed of 4 packed single-precision (32-bit) /// floating-point elements) from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(vextractf128, imm8 = 1) )] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_ps(a: __m256, imm8: i32) -> __m128 { match imm8 & 1 { 0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]), _ => simd_shuffle4(a, _mm256_undefined_ps(), [4, 5, 6, 7]), } } /// Extracts 128 bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(vextractf128, imm8 = 1) )] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_pd(a: __m256d, imm8: i32) -> __m128d { match imm8 & 1 { 0 => simd_shuffle2(a, _mm256_undefined_pd(), [0, 1]), _ => simd_shuffle2(a, _mm256_undefined_pd(), [2, 3]), } } /// Extracts 128 bits (composed of integer data) from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extractf128_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(vextractf128, imm8 = 1) )] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i { let b = _mm256_undefined_si256().as_i64x4(); let dst: i64x2 = match imm8 & 1 { 0 => simd_shuffle2(a.as_i64x4(), b, [0, 1]), _ => simd_shuffle2(a.as_i64x4(), b, [2, 3]), }; transmute(dst) } /// Zeroes the contents of all XMM or YMM registers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zeroall) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vzeroall))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zeroall() { vzeroall() } /// Zeroes the upper 128 bits of all YMM registers; /// the lower 128-bits of the registers are unmodified. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zeroupper) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vzeroupper))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zeroupper() { vzeroupper() } /// Shuffles single-precision (32-bit) floating-point elements in `a` /// within 128-bit lanes using the control in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256 { vpermilps256(a, b.as_i32x8()) } /// Shuffles single-precision (32-bit) floating-point elements in `a` /// using the control in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutevar_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 { vpermilps(a, b.as_i32x4()) } /// Shuffles single-precision (32-bit) floating-point elements in `a` /// within 128-bit lanes using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute_ps(a: __m256, imm8: i32) -> __m256 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { simd_shuffle8( a, _mm256_undefined_ps(), [$a, $b, $c, $d, $a + 4, $b + 4, $c + 4, $d + 4], ) }; } macro_rules! shuffle3 { ($a:expr, $b:expr, $c:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => shuffle4!($a, $b, $c, 0), 0b01 => shuffle4!($a, $b, $c, 1), 0b10 => shuffle4!($a, $b, $c, 2), _ => shuffle4!($a, $b, $c, 3), } }; } macro_rules! shuffle2 { ($a:expr, $b:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => shuffle3!($a, $b, 0), 0b01 => shuffle3!($a, $b, 1), 0b10 => shuffle3!($a, $b, 2), _ => shuffle3!($a, $b, 3), } }; } macro_rules! shuffle1 { ($a:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => shuffle2!($a, 0), 0b01 => shuffle2!($a, 1), 0b10 => shuffle2!($a, 2), _ => shuffle2!($a, 3), } }; } match imm8 & 0b11 { 0b00 => shuffle1!(0), 0b01 => shuffle1!(1), 0b10 => shuffle1!(2), _ => shuffle1!(3), } } /// Shuffles single-precision (32-bit) floating-point elements in `a` /// using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_ps) #[inline] #[target_feature(enable = "avx,sse")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permute_ps(a: __m128, imm8: i32) -> __m128 { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { simd_shuffle4(a, _mm_undefined_ps(), [$a, $b, $c, $d]) }; } macro_rules! shuffle3 { ($a:expr, $b:expr, $c:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => shuffle4!($a, $b, $c, 0), 0b01 => shuffle4!($a, $b, $c, 1), 0b10 => shuffle4!($a, $b, $c, 2), _ => shuffle4!($a, $b, $c, 3), } }; } macro_rules! shuffle2 { ($a:expr, $b:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => shuffle3!($a, $b, 0), 0b01 => shuffle3!($a, $b, 1), 0b10 => shuffle3!($a, $b, 2), _ => shuffle3!($a, $b, 3), } }; } macro_rules! shuffle1 { ($a:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => shuffle2!($a, 0), 0b01 => shuffle2!($a, 1), 0b10 => shuffle2!($a, 2), _ => shuffle2!($a, 3), } }; } match imm8 & 0b11 { 0b00 => shuffle1!(0), 0b01 => shuffle1!(1), 0b10 => shuffle1!(2), _ => shuffle1!(3), } } /// Shuffles double-precision (64-bit) floating-point elements in `a` /// within 256-bit lanes using the control in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d { vpermilpd256(a, b.as_i64x4()) } /// Shuffles double-precision (64-bit) floating-point elements in `a` /// using the control in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutevar_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d { vpermilpd(a, b.as_i64x2()) } /// Shuffles double-precision (64-bit) floating-point elements in `a` /// within 128-bit lanes using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute_pd(a: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { simd_shuffle4(a, _mm256_undefined_pd(), [$a, $b, $c, $d]); }; } macro_rules! shuffle3 { ($a:expr, $b:expr, $c:expr) => { match (imm8 >> 3) & 0x1 { 0 => shuffle4!($a, $b, $c, 2), _ => shuffle4!($a, $b, $c, 3), } }; } macro_rules! shuffle2 { ($a:expr, $b:expr) => { match (imm8 >> 2) & 0x1 { 0 => shuffle3!($a, $b, 2), _ => shuffle3!($a, $b, 3), } }; } macro_rules! shuffle1 { ($a:expr) => { match (imm8 >> 1) & 0x1 { 0 => shuffle2!($a, 0), _ => shuffle2!($a, 1), } }; } match imm8 & 0x1 { 0 => shuffle1!(0), _ => shuffle1!(1), } } /// Shuffles double-precision (64-bit) floating-point elements in `a` /// using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permute_pd) #[inline] #[target_feature(enable = "avx,sse2")] #[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_permute_pd(a: __m128d, imm8: i32) -> __m128d { let imm8 = (imm8 & 0xFF) as u8; macro_rules! shuffle2 { ($a:expr, $b:expr) => { simd_shuffle2(a, _mm_undefined_pd(), [$a, $b]); }; } macro_rules! shuffle1 { ($a:expr) => { match (imm8 >> 1) & 0x1 { 0 => shuffle2!($a, 0), _ => shuffle2!($a, 1), } }; } match imm8 & 0x1 { 0 => shuffle1!(0), _ => shuffle1!(1), } } /// Shuffles 256 bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) selected by `imm8` from `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x5))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute2f128_ps(a: __m256, b: __m256, imm8: i32) -> __m256 { macro_rules! call { ($imm8:expr) => { vperm2f128ps256(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Shuffles 256 bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) selected by `imm8` from `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute2f128_pd(a: __m256d, b: __m256d, imm8: i32) -> __m256d { macro_rules! call { ($imm8:expr) => { vperm2f128pd256(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Shuffles 258-bits (composed of integer data) selected by `imm8` /// from `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2f128_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 0x31))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute2f128_si256(a: __m256i, b: __m256i, imm8: i32) -> __m256i { let a = a.as_i32x8(); let b = b.as_i32x8(); macro_rules! call { ($imm8:expr) => { vperm2f128si256(a, b, $imm8) }; } let r = constify_imm8!(imm8, call); transmute(r) } /// Broadcasts a single-precision (32-bit) floating-point element from memory /// to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_ss) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::trivially_copy_pass_by_ref)] pub unsafe fn _mm256_broadcast_ss(f: &f32) -> __m256 { _mm256_set1_ps(*f) } /// Broadcasts a single-precision (32-bit) floating-point element from memory /// to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcast_ss) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::trivially_copy_pass_by_ref)] pub unsafe fn _mm_broadcast_ss(f: &f32) -> __m128 { _mm_set1_ps(*f) } /// Broadcasts a double-precision (64-bit) floating-point element from memory /// to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_sd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastsd))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::trivially_copy_pass_by_ref)] pub unsafe fn _mm256_broadcast_sd(f: &f64) -> __m256d { _mm256_set1_pd(*f) } /// Broadcasts 128 bits from memory (composed of 4 packed single-precision /// (32-bit) floating-point elements) to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcast_ps(a: &__m128) -> __m256 { vbroadcastf128ps256(a) } /// Broadcasts 128 bits from memory (composed of 2 packed double-precision /// (64-bit) floating-point elements) to all elements of the returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcast_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vbroadcastf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcast_pd(a: &__m128d) -> __m256d { vbroadcastf128pd256(a) } /// Copies `a` to result, then inserts 128 bits (composed of 4 packed /// single-precision (32-bit) floating-point elements) from `b` into result /// at the location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insertf128_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(vinsertf128, imm8 = 1) )] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_ps(a: __m256, b: __m128, imm8: i32) -> __m256 { let b = _mm256_castps128_ps256(b); match imm8 & 1 { 0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]), _ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]), } } /// Copies `a` to result, then inserts 128 bits (composed of 2 packed /// double-precision (64-bit) floating-point elements) from `b` into result /// at the location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insertf128_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(vinsertf128, imm8 = 1) )] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_pd(a: __m256d, b: __m128d, imm8: i32) -> __m256d { match imm8 & 1 { 0 => simd_shuffle4(a, _mm256_castpd128_pd256(b), [4, 5, 2, 3]), _ => simd_shuffle4(a, _mm256_castpd128_pd256(b), [0, 1, 4, 5]), } } /// Copies `a` to result, then inserts 128 bits from `b` into result /// at the location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insertf128_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(vinsertf128, imm8 = 1) )] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insertf128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m256i { let b = _mm256_castsi128_si256(b).as_i64x4(); let dst: i64x4 = match imm8 & 1 { 0 => simd_shuffle4(a.as_i64x4(), b, [4, 5, 2, 3]), _ => simd_shuffle4(a.as_i64x4(), b, [0, 1, 4, 5]), }; transmute(dst) } /// Copies `a` to result, and inserts the 8-bit integer `i` into result /// at the location specified by `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi8) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi8(a: __m256i, i: i8, index: i32) -> __m256i { transmute(simd_insert(a.as_i8x32(), (index as u32) & 31, i)) } /// Copies `a` to result, and inserts the 16-bit integer `i` into result /// at the location specified by `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi16) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi16(a: __m256i, i: i16, index: i32) -> __m256i { transmute(simd_insert(a.as_i16x16(), (index as u32) & 15, i)) } /// Copies `a` to result, and inserts the 32-bit integer `i` into result /// at the location specified by `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi32) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32, index: i32) -> __m256i { transmute(simd_insert(a.as_i32x8(), (index as u32) & 7, i)) } /// Loads 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from memory into result. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { *(mem_addr as *const __m256d) } /// Stores 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_store_pd(mem_addr: *const f64, a: __m256d) { *(mem_addr as *mut __m256d) = a; } /// Loads 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from memory into result. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 { *(mem_addr as *const __m256) } /// Stores 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_store_ps(mem_addr: *const f32, a: __m256) { *(mem_addr as *mut __m256) = a; } /// Loads 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from memory into result. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d { let mut dst = _mm256_undefined_pd(); ptr::copy_nonoverlapping( mem_addr as *const u8, &mut dst as *mut __m256d as *mut u8, mem::size_of::<__m256d>(), ); dst } /// Stores 256-bits (composed of 4 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) { storeupd256(mem_addr, a); } /// Loads 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from memory into result. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 { let mut dst = _mm256_undefined_ps(); ptr::copy_nonoverlapping( mem_addr as *const u8, &mut dst as *mut __m256 as *mut u8, mem::size_of::<__m256>(), ); dst } /// Stores 256-bits (composed of 8 packed single-precision (32-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) { storeups256(mem_addr, a); } /// Loads 256-bits of integer data from memory into result. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_load_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i { *mem_addr } /// Stores 256-bits of integer data from `a` into memory. /// `mem_addr` must be aligned on a 32-byte boundary or a /// general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_store_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovdqa expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) { *mem_addr = a; } /// Loads 256-bits of integer data from memory into result. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i { let mut dst = _mm256_undefined_si256(); ptr::copy_nonoverlapping( mem_addr as *const u8, &mut dst as *mut __m256i as *mut u8, mem::size_of::<__m256i>(), ); dst } /// Stores 256-bits of integer data from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) { storeudq256(mem_addr as *mut i8, a.as_i8x32()); } /// Loads packed double-precision (64-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d { maskloadpd256(mem_addr as *const i8, mask.as_i64x4()) } /// Stores packed double-precision (64-bit) floating-point elements from `a` /// into memory using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) { maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a); } /// Loads packed double-precision (64-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d { maskloadpd(mem_addr as *const i8, mask.as_i64x2()) } /// Stores packed double-precision (64-bit) floating-point elements from `a` /// into memory using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) { maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a); } /// Loads packed single-precision (32-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 { maskloadps256(mem_addr as *const i8, mask.as_i32x8()) } /// Stores packed single-precision (32-bit) floating-point elements from `a` /// into memory using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) { maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a); } /// Loads packed single-precision (32-bit) floating-point elements from memory /// into result using `mask` (elements are zeroed out when the high bit of the /// corresponding element is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 { maskloadps(mem_addr as *const i8, mask.as_i32x4()) } /// Stores packed single-precision (32-bit) floating-point elements from `a` /// into memory using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmaskmovps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) { maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a); } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements /// from `a`, and returns the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movehdup_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovshdup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movehdup_ps(a: __m256) -> __m256 { simd_shuffle8(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) } /// Duplicate even-indexed single-precision (32-bit) floating-point elements /// from `a`, and returns the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_moveldup_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovsldup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_moveldup_ps(a: __m256) -> __m256 { simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) } /// Duplicate even-indexed double-precision (64-bit) floating-point elements /// from "a", and returns the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movedup_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movedup_pd(a: __m256d) -> __m256d { simd_shuffle4(a, a, [0, 0, 2, 2]) } /// Loads 256-bits of integer data from unaligned memory into result. /// This intrinsic may perform better than `_mm256_loadu_si256` when the /// data crosses a cache line boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lddqu_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vlddqu))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i { transmute(vlddqu(mem_addr as *const i8)) } /// Moves integer data from a 256-bit integer vector to a 32-byte /// aligned memory location. To minimize caching, the data is flagged as /// non-temporal (unlikely to be used again soon) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_stream_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) { intrinsics::nontemporal_store(mem_addr, a); } /// Moves double-precision values from a 256-bit vector of `[4 x double]` /// to a 32-byte aligned memory location. To minimize caching, the data is /// flagged as non-temporal (unlikely to be used again soon). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_stream_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) { intrinsics::nontemporal_store(mem_addr as *mut __m256d, a); } /// Moves single-precision floating point values from a 256-bit vector /// of `[8 x float]` to a 32-byte aligned memory location. To minimize /// caching, the data is flagged as non-temporal (unlikely to be used again /// soon). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_stream_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovntps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256) { intrinsics::nontemporal_store(mem_addr as *mut __m256, a); } /// Computes the approximate reciprocal of packed single-precision (32-bit) /// floating-point elements in `a`, and returns the results. The maximum /// relative error for this approximation is less than 1.5*2^-12. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rcp_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrcpps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_rcp_ps(a: __m256) -> __m256 { vrcpps(a) } /// Computes the approximate reciprocal square root of packed single-precision /// (32-bit) floating-point elements in `a`, and returns the results. /// The maximum relative error for this approximation is less than 1.5*2^-12. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_rsqrt_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vrsqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_rsqrt_ps(a: __m256) -> __m256 { vrsqrtps(a) } /// Unpacks and interleave double-precision (64-bit) floating-point elements /// from the high half of each 128-bit lane in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d { simd_shuffle4(a, b, [1, 5, 3, 7]) } /// Unpacks and interleave single-precision (32-bit) floating-point elements /// from the high half of each 128-bit lane in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 { simd_shuffle8(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) } /// Unpacks and interleave double-precision (64-bit) floating-point elements /// from the low half of each 128-bit lane in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d { simd_shuffle4(a, b, [0, 4, 2, 6]) } /// Unpacks and interleave single-precision (32-bit) floating-point elements /// from the low half of each 128-bit lane in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vunpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 { simd_shuffle8(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. /// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if /// the result is zero, otherwise set `CF` to 0. Return the `ZF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testz_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 { ptestz256(a.as_i64x4(), b.as_i64x4()) } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. /// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if /// the result is zero, otherwise set `CF` to 0. Return the `CF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testc_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 { ptestc256(a.as_i64x4(), b.as_i64x4()) } /// Computes the bitwise AND of 256 bits (representing integer data) in `a` and /// `b`, and set `ZF` to 1 if the result is zero, otherwise set `ZF` to 0. /// Computes the bitwise NOT of `a` and then AND with `b`, and set `CF` to 1 if /// the result is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and /// `CF` values are zero, otherwise return 0. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testnzc_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32 { ptestnzc256(a.as_i64x4(), b.as_i64x4()) } /// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `ZF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testz_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32 { vtestzpd256(a, b) } /// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `CF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testc_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32 { vtestcpd256(a, b) } /// Computes the bitwise AND of 256 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values /// are zero, otherwise return 0. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testnzc_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 { vtestnzcpd256(a, b) } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `ZF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 { vtestzpd(a, b) } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `CF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 { vtestcpd(a, b) } /// Computes the bitwise AND of 128 bits (representing double-precision (64-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 64-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 64-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values /// are zero, otherwise return 0. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32 { vtestnzcpd(a, b) } /// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `ZF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testz_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testz_ps(a: __m256, b: __m256) -> i32 { vtestzps256(a, b) } /// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `CF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testc_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testc_ps(a: __m256, b: __m256) -> i32 { vtestcps256(a, b) } /// Computes the bitwise AND of 256 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 256-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values /// are zero, otherwise return 0. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_testnzc_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 { vtestnzcps256(a, b) } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `ZF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testz_ps(a: __m128, b: __m128) -> i32 { vtestzps(a, b) } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return the `CF` value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testc_ps(a: __m128, b: __m128) -> i32 { vtestcps(a, b) } /// Computes the bitwise AND of 128 bits (representing single-precision (32-bit) /// floating-point elements) in `a` and `b`, producing an intermediate 128-bit /// value, and set `ZF` to 1 if the sign bit of each 32-bit element in the /// intermediate value is zero, otherwise set `ZF` to 0. Compute the bitwise /// NOT of `a` and then AND with `b`, producing an intermediate value, and set /// `CF` to 1 if the sign bit of each 32-bit element in the intermediate value /// is zero, otherwise set `CF` to 0. Return 1 if both the `ZF` and `CF` values /// are zero, otherwise return 0. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vtestps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { vtestnzcps(a, b) } /// Sets each bit of the returned mask based on the most significant bit of the /// corresponding packed double-precision (64-bit) floating-point element in /// `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { movmskpd256(a) } /// Sets each bit of the returned mask based on the most significant bit of the /// corresponding packed single-precision (32-bit) floating-point element in /// `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vmovmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { movmskps256(a) } /// Returns vector of type __m256d with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_pd) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_pd() -> __m256d { _mm256_set1_pd(0.0) } /// Returns vector of type __m256 with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ps) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_ps() -> __m256 { _mm256_set1_ps(0.0) } /// Returns vector of type __m256i with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_si256) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vxor))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_si256() -> __m256i { _mm256_set1_epi8(0) } /// Sets packed double-precision (64-bit) floating-point elements in returned /// vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { _mm256_setr_pd(d, c, b, a) } /// Sets packed single-precision (32-bit) floating-point elements in returned /// vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_ps( a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32, ) -> __m256 { _mm256_setr_ps(h, g, f, e, d, c, b, a) } /// Sets packed 8-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi8) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_epi8( e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8, e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8, e16: i8, e17: i8, e18: i8, e19: i8, e20: i8, e21: i8, e22: i8, e23: i8, e24: i8, e25: i8, e26: i8, e27: i8, e28: i8, e29: i8, e30: i8, e31: i8, ) -> __m256i { #[rustfmt::skip] _mm256_setr_epi8( e31, e30, e29, e28, e27, e26, e25, e24, e23, e22, e21, e20, e19, e18, e17, e16, e15, e14, e13, e12, e11, e10, e09, e08, e07, e06, e05, e04, e03, e02, e01, e00, ) } /// Sets packed 16-bit integers in returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi16) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_epi16( e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16, e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16, e14: i16, e15: i16, ) -> __m256i { #[rustfmt::skip] _mm256_setr_epi16( e15, e14, e13, e12, e11, e10, e09, e08, e07, e06, e05, e04, e03, e02, e01, e00, ) } /// Sets packed 32-bit integers in returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi32) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_epi32( e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32, ) -> __m256i { _mm256_setr_epi32(e7, e6, e5, e4, e3, e2, e1, e0) } /// Sets packed 64-bit integers in returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_epi64x) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { _mm256_setr_epi64x(d, c, b, a) } /// Sets packed double-precision (64-bit) floating-point elements in returned /// vector with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d { __m256d(a, b, c, d) } /// Sets packed single-precision (32-bit) floating-point elements in returned /// vector with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_ps( a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32, ) -> __m256 { __m256(a, b, c, d, e, f, g, h) } /// Sets packed 8-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi8) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_epi8( e00: i8, e01: i8, e02: i8, e03: i8, e04: i8, e05: i8, e06: i8, e07: i8, e08: i8, e09: i8, e10: i8, e11: i8, e12: i8, e13: i8, e14: i8, e15: i8, e16: i8, e17: i8, e18: i8, e19: i8, e20: i8, e21: i8, e22: i8, e23: i8, e24: i8, e25: i8, e26: i8, e27: i8, e28: i8, e29: i8, e30: i8, e31: i8, ) -> __m256i { #[rustfmt::skip] transmute(i8x32::new( e00, e01, e02, e03, e04, e05, e06, e07, e08, e09, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, )) } /// Sets packed 16-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi16) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_epi16( e00: i16, e01: i16, e02: i16, e03: i16, e04: i16, e05: i16, e06: i16, e07: i16, e08: i16, e09: i16, e10: i16, e11: i16, e12: i16, e13: i16, e14: i16, e15: i16, ) -> __m256i { #[rustfmt::skip] transmute(i16x16::new( e00, e01, e02, e03, e04, e05, e06, e07, e08, e09, e10, e11, e12, e13, e14, e15, )) } /// Sets packed 32-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi32) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_epi32( e0: i32, e1: i32, e2: i32, e3: i32, e4: i32, e5: i32, e6: i32, e7: i32, ) -> __m256i { transmute(i32x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } /// Sets packed 64-bit integers in returned vector with the supplied values in /// reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_epi64x) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i { transmute(i64x4::new(a, b, c, d)) } /// Broadcasts double-precision (64-bit) floating-point value `a` to all /// elements of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_pd(a: f64) -> __m256d { _mm256_setr_pd(a, a, a, a) } /// Broadcasts single-precision (32-bit) floating-point value `a` to all /// elements of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_ps(a: f32) -> __m256 { _mm256_setr_ps(a, a, a, a, a, a, a, a) } /// Broadcasts 8-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastb`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi8) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vpshufb))] #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { #[rustfmt::skip] _mm256_setr_epi8( a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, ) } /// Broadcasts 16-bit integer `a` to all all elements of returned vector. /// This intrinsic may generate the `vpbroadcastw`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi16) #[inline] #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(vpshufb))] #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_epi16(a: i16) -> __m256i { _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } /// Broadcasts 32-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastd`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi32) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_epi32(a: i32) -> __m256i { _mm256_setr_epi32(a, a, a, a, a, a, a, a) } /// Broadcasts 64-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastq`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi64x) #[inline] #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(vmovddup))] #[cfg_attr(test, assert_instr(vinsertf128))] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set1_epi64x(a: i64) -> __m256i { _mm256_setr_epi64x(a, a, a, a) } /// Cast vector of type __m256d to type __m256. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd_ps(a: __m256d) -> __m256 { transmute(a) } /// Cast vector of type __m256 to type __m256d. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps_pd(a: __m256) -> __m256d { transmute(a) } /// Casts vector of type __m256 to type __m256i. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_si256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps_si256(a: __m256) -> __m256i { transmute(a) } /// Casts vector of type __m256i to type __m256. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi256_ps(a: __m256i) -> __m256 { transmute(a) } /// Casts vector of type __m256d to type __m256i. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_si256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd_si256(a: __m256d) -> __m256i { transmute(a) } /// Casts vector of type __m256i to type __m256d. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi256_pd(a: __m256i) -> __m256d { transmute(a) } /// Casts vector of type __m256 to type __m128. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps256_ps128) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps256_ps128(a: __m256) -> __m128 { simd_shuffle4(a, a, [0, 1, 2, 3]) } /// Casts vector of type __m256d to type __m128d. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd256_pd128) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd256_pd128(a: __m256d) -> __m128d { simd_shuffle2(a, a, [0, 1]) } /// Casts vector of type __m256i to type __m128i. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_si128) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { let a = a.as_i64x4(); let dst: i64x2 = simd_shuffle2(a, a, [0, 1]); transmute(dst) } /// Casts vector of type __m128 to type __m256; /// the upper 128 bits of the result are undefined. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps128_ps256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { // FIXME simd_shuffle8(a, a, [0, 1, 2, 3, -1, -1, -1, -1]) simd_shuffle8(a, a, [0, 1, 2, 3, 0, 0, 0, 0]) } /// Casts vector of type __m128d to type __m256d; /// the upper 128 bits of the result are undefined. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd128_pd256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { // FIXME simd_shuffle4(a, a, [0, 1, -1, -1]) simd_shuffle4(a, a, [0, 1, 0, 0]) } /// Casts vector of type __m128i to type __m256i; /// the upper 128 bits of the result are undefined. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi128_si256) #[inline] #[target_feature(enable = "avx")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { let a = a.as_i64x2(); // FIXME simd_shuffle4(a, a, [0, 1, -1, -1]) let dst: i64x4 = simd_shuffle4(a, a, [0, 1, 0, 0]); transmute(dst) } /// Constructs a 256-bit floating-point vector of `[8 x float]` from a /// 128-bit floating-point vector of `[4 x float]`. The lower 128 bits contain /// the value of the source vector. The upper 128 bits are set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextps128_ps256) #[inline] #[target_feature(enable = "avx,sse")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 { simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) } /// Constructs a 256-bit integer vector from a 128-bit integer vector. /// The lower 128 bits contain the value of the source vector. The upper /// 128 bits are set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextsi128_si256) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i { let b = _mm_setzero_si128().as_i64x2(); let dst: i64x4 = simd_shuffle4(a.as_i64x2(), b, [0, 1, 2, 3]); transmute(dst) } /// Constructs a 256-bit floating-point vector of `[4 x double]` from a /// 128-bit floating-point vector of `[2 x double]`. The lower 128 bits /// contain the value of the source vector. The upper 128 bits are set /// to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextpd128_pd256) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic is only used for compilation and does not generate any // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d { simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3]) } /// Returns vector of type `__m256` with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ps) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_ps() -> __m256 { // FIXME: this function should return MaybeUninit<__m256> mem::MaybeUninit::<__m256>::uninit().assume_init() } /// Returns vector of type `__m256d` with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_pd) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_pd() -> __m256d { // FIXME: this function should return MaybeUninit<__m256d> mem::MaybeUninit::<__m256d>::uninit().assume_init() } /// Returns vector of type __m256i with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_si256) #[inline] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_undefined_si256() -> __m256i { // FIXME: this function should return MaybeUninit<__m256i> mem::MaybeUninit::<__m256i>::uninit().assume_init() } /// Sets packed __m256 returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 { simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) } /// Sets packed __m256d returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128d) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d { let hi: __m128 = transmute(hi); let lo: __m128 = transmute(lo); transmute(_mm256_set_m128(hi, lo)) } /// Sets packed __m256i returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set_m128i) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { let hi: __m128 = transmute(hi); let lo: __m128 = transmute(lo); transmute(_mm256_set_m128(hi, lo)) } /// Sets packed __m256 returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 { _mm256_set_m128(hi, lo) } /// Sets packed __m256d returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128d) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d { _mm256_set_m128d(hi, lo) } /// Sets packed __m256i returned vector with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setr_m128i) #[inline] #[target_feature(enable = "avx")] #[cfg_attr(test, assert_instr(vinsertf128))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i { _mm256_set_m128i(hi, lo) } /// Loads two 128-bit values (composed of 4 packed single-precision (32-bit) /// floating-point elements) from memory, and combine them into a 256-bit /// value. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu2_m128) #[inline] #[target_feature(enable = "avx,sse")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 { let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr)); _mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1) } /// Loads two 128-bit values (composed of 2 packed double-precision (64-bit) /// floating-point elements) from memory, and combine them into a 256-bit /// value. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu2_m128d) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d { let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr)); _mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1) } /// Loads two 128-bit values (composed of integer data) from memory, and combine /// them into a 256-bit value. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_loadu2_m128i) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i { let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr)); _mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1) } /// Stores the high and low 128-bit halves (each composed of 4 packed /// single-precision (32-bit) floating-point elements) from `a` into memory two /// different 128-bit locations. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu2_m128) #[inline] #[target_feature(enable = "avx,sse")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) { let lo = _mm256_castps256_ps128(a); _mm_storeu_ps(loaddr, lo); let hi = _mm256_extractf128_ps(a, 1); _mm_storeu_ps(hiaddr, hi); } /// Stores the high and low 128-bit halves (each composed of 2 packed /// double-precision (64-bit) floating-point elements) from `a` into memory two /// different 128-bit locations. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu2_m128d) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) { let lo = _mm256_castpd256_pd128(a); _mm_storeu_pd(loaddr, lo); let hi = _mm256_extractf128_pd(a, 1); _mm_storeu_pd(hiaddr, hi); } /// Stores the high and low 128-bit halves (each composed of integer data) from /// `a` into memory two different 128-bit locations. /// `hiaddr` and `loaddr` do not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_storeu2_m128i) #[inline] #[target_feature(enable = "avx,sse2")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) { let lo = _mm256_castsi256_si128(a); _mm_storeu_si128(loaddr, lo); let hi = _mm256_extractf128_si256(a, 1); _mm_storeu_si128(hiaddr, hi); } /// Returns the first element of the input vector of `[8 x float]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtss_f32) #[inline] #[target_feature(enable = "avx")] //#[cfg_attr(test, assert_instr(movss))] FIXME #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 { simd_extract(a, 0) } /// LLVM intrinsics used in the above functions #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.avx.addsub.pd.256"] fn addsubpd256(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.addsub.ps.256"] fn addsubps256(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.avx.max.pd.256"] fn maxpd256(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.max.ps.256"] fn maxps256(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.avx.min.pd.256"] fn minpd256(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.min.ps.256"] fn minps256(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.avx.round.pd.256"] fn roundpd256(a: __m256d, b: i32) -> __m256d; #[link_name = "llvm.x86.avx.round.ps.256"] fn roundps256(a: __m256, b: i32) -> __m256; #[link_name = "llvm.x86.avx.sqrt.pd.256"] fn sqrtpd256(a: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.sqrt.ps.256"] fn sqrtps256(a: __m256) -> __m256; #[link_name = "llvm.x86.avx.blendv.pd.256"] fn vblendvpd(a: __m256d, b: __m256d, c: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.blendv.ps.256"] fn vblendvps(a: __m256, b: __m256, c: __m256) -> __m256; #[link_name = "llvm.x86.avx.dp.ps.256"] fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256; #[link_name = "llvm.x86.avx.hadd.pd.256"] fn vhaddpd(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.hadd.ps.256"] fn vhaddps(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.avx.hsub.pd.256"] fn vhsubpd(a: __m256d, b: __m256d) -> __m256d; #[link_name = "llvm.x86.avx.hsub.ps.256"] fn vhsubps(a: __m256, b: __m256) -> __m256; #[link_name = "llvm.x86.sse2.cmp.pd"] fn vcmppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; #[link_name = "llvm.x86.avx.cmp.pd.256"] fn vcmppd256(a: __m256d, b: __m256d, imm8: u8) -> __m256d; #[link_name = "llvm.x86.sse.cmp.ps"] fn vcmpps(a: __m128, b: __m128, imm8: u8) -> __m128; #[link_name = "llvm.x86.avx.cmp.ps.256"] fn vcmpps256(a: __m256, b: __m256, imm8: u8) -> __m256; #[link_name = "llvm.x86.sse2.cmp.sd"] fn vcmpsd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; #[link_name = "llvm.x86.sse.cmp.ss"] fn vcmpss(a: __m128, b: __m128, imm8: u8) -> __m128; #[link_name = "llvm.x86.avx.cvtdq2.ps.256"] fn vcvtdq2ps(a: i32x8) -> __m256; #[link_name = "llvm.x86.avx.cvt.pd2.ps.256"] fn vcvtpd2ps(a: __m256d) -> __m128; #[link_name = "llvm.x86.avx.cvt.ps2dq.256"] fn vcvtps2dq(a: __m256) -> i32x8; #[link_name = "llvm.x86.avx.cvtt.pd2dq.256"] fn vcvttpd2dq(a: __m256d) -> i32x4; #[link_name = "llvm.x86.avx.cvt.pd2dq.256"] fn vcvtpd2dq(a: __m256d) -> i32x4; #[link_name = "llvm.x86.avx.cvtt.ps2dq.256"] fn vcvttps2dq(a: __m256) -> i32x8; #[link_name = "llvm.x86.avx.vzeroall"] fn vzeroall(); #[link_name = "llvm.x86.avx.vzeroupper"] fn vzeroupper(); #[link_name = "llvm.x86.avx.vpermilvar.ps.256"] fn vpermilps256(a: __m256, b: i32x8) -> __m256; #[link_name = "llvm.x86.avx.vpermilvar.ps"] fn vpermilps(a: __m128, b: i32x4) -> __m128; #[link_name = "llvm.x86.avx.vpermilvar.pd.256"] fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d; #[link_name = "llvm.x86.avx.vpermilvar.pd"] fn vpermilpd(a: __m128d, b: i64x2) -> __m128d; #[link_name = "llvm.x86.avx.vperm2f128.ps.256"] fn vperm2f128ps256(a: __m256, b: __m256, imm8: i8) -> __m256; #[link_name = "llvm.x86.avx.vperm2f128.pd.256"] fn vperm2f128pd256(a: __m256d, b: __m256d, imm8: i8) -> __m256d; #[link_name = "llvm.x86.avx.vperm2f128.si.256"] fn vperm2f128si256(a: i32x8, b: i32x8, imm8: i8) -> i32x8; #[link_name = "llvm.x86.avx.vbroadcastf128.ps.256"] fn vbroadcastf128ps256(a: &__m128) -> __m256; #[link_name = "llvm.x86.avx.vbroadcastf128.pd.256"] fn vbroadcastf128pd256(a: &__m128d) -> __m256d; #[link_name = "llvm.x86.avx.storeu.pd.256"] fn storeupd256(mem_addr: *mut f64, a: __m256d); #[link_name = "llvm.x86.avx.storeu.ps.256"] fn storeups256(mem_addr: *mut f32, a: __m256); #[link_name = "llvm.x86.avx.storeu.dq.256"] fn storeudq256(mem_addr: *mut i8, a: i8x32); #[link_name = "llvm.x86.avx.maskload.pd.256"] fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d; #[link_name = "llvm.x86.avx.maskstore.pd.256"] fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d); #[link_name = "llvm.x86.avx.maskload.pd"] fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d; #[link_name = "llvm.x86.avx.maskstore.pd"] fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d); #[link_name = "llvm.x86.avx.maskload.ps.256"] fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256; #[link_name = "llvm.x86.avx.maskstore.ps.256"] fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256); #[link_name = "llvm.x86.avx.maskload.ps"] fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128; #[link_name = "llvm.x86.avx.maskstore.ps"] fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128); #[link_name = "llvm.x86.avx.ldu.dq.256"] fn vlddqu(mem_addr: *const i8) -> i8x32; #[link_name = "llvm.x86.avx.rcp.ps.256"] fn vrcpps(a: __m256) -> __m256; #[link_name = "llvm.x86.avx.rsqrt.ps.256"] fn vrsqrtps(a: __m256) -> __m256; #[link_name = "llvm.x86.avx.ptestz.256"] fn ptestz256(a: i64x4, b: i64x4) -> i32; #[link_name = "llvm.x86.avx.ptestc.256"] fn ptestc256(a: i64x4, b: i64x4) -> i32; #[link_name = "llvm.x86.avx.ptestnzc.256"] fn ptestnzc256(a: i64x4, b: i64x4) -> i32; #[link_name = "llvm.x86.avx.vtestz.pd.256"] fn vtestzpd256(a: __m256d, b: __m256d) -> i32; #[link_name = "llvm.x86.avx.vtestc.pd.256"] fn vtestcpd256(a: __m256d, b: __m256d) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.pd.256"] fn vtestnzcpd256(a: __m256d, b: __m256d) -> i32; #[link_name = "llvm.x86.avx.vtestz.pd"] fn vtestzpd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.avx.vtestc.pd"] fn vtestcpd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.pd"] fn vtestnzcpd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.avx.vtestz.ps.256"] fn vtestzps256(a: __m256, b: __m256) -> i32; #[link_name = "llvm.x86.avx.vtestc.ps.256"] fn vtestcps256(a: __m256, b: __m256) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.ps.256"] fn vtestnzcps256(a: __m256, b: __m256) -> i32; #[link_name = "llvm.x86.avx.vtestz.ps"] fn vtestzps(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.avx.vtestc.ps"] fn vtestcps(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.avx.vtestnzc.ps"] fn vtestnzcps(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.avx.movmsk.pd.256"] fn movmskpd256(a: __m256d) -> i32; #[link_name = "llvm.x86.avx.movmsk.ps.256"] fn movmskps256(a: __m256) -> i32; } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. use crate::core_arch::x86::*; #[simd_test(enable = "avx")] unsafe fn test_mm256_add_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_add_pd(a, b); let e = _mm256_setr_pd(6., 8., 10., 12.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_add_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); let r = _mm256_add_ps(a, b); let e = _mm256_setr_ps(10., 12., 14., 16., 18., 20., 22., 24.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_and_pd() { let a = _mm256_set1_pd(1.); let b = _mm256_set1_pd(0.6); let r = _mm256_and_pd(a, b); let e = _mm256_set1_pd(0.5); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_and_ps() { let a = _mm256_set1_ps(1.); let b = _mm256_set1_ps(0.6); let r = _mm256_and_ps(a, b); let e = _mm256_set1_ps(0.5); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_or_pd() { let a = _mm256_set1_pd(1.); let b = _mm256_set1_pd(0.6); let r = _mm256_or_pd(a, b); let e = _mm256_set1_pd(1.2); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_or_ps() { let a = _mm256_set1_ps(1.); let b = _mm256_set1_ps(0.6); let r = _mm256_or_ps(a, b); let e = _mm256_set1_ps(1.2); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_shuffle_pd() { let a = _mm256_setr_pd(1., 4., 5., 8.); let b = _mm256_setr_pd(2., 3., 6., 7.); let r = _mm256_shuffle_pd(a, b, 0xF); let e = _mm256_setr_pd(4., 3., 8., 7.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_shuffle_ps() { let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); let r = _mm256_shuffle_ps(a, b, 0x0F); let e = _mm256_setr_ps(8., 8., 2., 2., 16., 16., 10., 10.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_andnot_pd() { let a = _mm256_set1_pd(0.); let b = _mm256_set1_pd(0.6); let r = _mm256_andnot_pd(a, b); assert_eq_m256d(r, b); } #[simd_test(enable = "avx")] unsafe fn test_mm256_andnot_ps() { let a = _mm256_set1_ps(0.); let b = _mm256_set1_ps(0.6); let r = _mm256_andnot_ps(a, b); assert_eq_m256(r, b); } #[simd_test(enable = "avx")] unsafe fn test_mm256_max_pd() { let a = _mm256_setr_pd(1., 4., 5., 8.); let b = _mm256_setr_pd(2., 3., 6., 7.); let r = _mm256_max_pd(a, b); let e = _mm256_setr_pd(2., 4., 6., 8.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_max_ps() { let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); let r = _mm256_max_ps(a, b); let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_min_pd() { let a = _mm256_setr_pd(1., 4., 5., 8.); let b = _mm256_setr_pd(2., 3., 6., 7.); let r = _mm256_min_pd(a, b); let e = _mm256_setr_pd(1., 3., 5., 7.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_min_ps() { let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); let r = _mm256_min_ps(a, b); let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_mul_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_mul_pd(a, b); let e = _mm256_setr_pd(5., 12., 21., 32.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_mul_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); let r = _mm256_mul_ps(a, b); let e = _mm256_setr_ps(9., 20., 33., 48., 65., 84., 105., 128.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_addsub_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_addsub_pd(a, b); let e = _mm256_setr_pd(-4., 8., -4., 12.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_addsub_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); let r = _mm256_addsub_ps(a, b); let e = _mm256_setr_ps(-4., 8., -4., 12., -4., 8., -4., 12.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_sub_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_sub_pd(a, b); let e = _mm256_setr_pd(-4., -4., -4., -4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_sub_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.); let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.); let r = _mm256_sub_ps(a, b); let e = _mm256_setr_ps(-4., -4., -4., -4., -4., -4., -4., -4.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_round_pd() { let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2); let result_closest = _mm256_round_pd(a, 0b00000000); let result_down = _mm256_round_pd(a, 0b00000001); let result_up = _mm256_round_pd(a, 0b00000010); let expected_closest = _mm256_setr_pd(2., 2., 4., -1.); let expected_down = _mm256_setr_pd(1., 2., 3., -2.); let expected_up = _mm256_setr_pd(2., 3., 4., -1.); assert_eq_m256d(result_closest, expected_closest); assert_eq_m256d(result_down, expected_down); assert_eq_m256d(result_up, expected_up); } #[simd_test(enable = "avx")] unsafe fn test_mm256_floor_pd() { let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2); let result_down = _mm256_floor_pd(a); let expected_down = _mm256_setr_pd(1., 2., 3., -2.); assert_eq_m256d(result_down, expected_down); } #[simd_test(enable = "avx")] unsafe fn test_mm256_ceil_pd() { let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2); let result_up = _mm256_ceil_pd(a); let expected_up = _mm256_setr_pd(2., 3., 4., -1.); assert_eq_m256d(result_up, expected_up); } #[simd_test(enable = "avx")] unsafe fn test_mm256_round_ps() { let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); let result_closest = _mm256_round_ps(a, 0b00000000); let result_down = _mm256_round_ps(a, 0b00000001); let result_up = _mm256_round_ps(a, 0b00000010); let expected_closest = _mm256_setr_ps(2., 2., 4., -1., 2., 2., 4., -1.); let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.); let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.); assert_eq_m256(result_closest, expected_closest); assert_eq_m256(result_down, expected_down); assert_eq_m256(result_up, expected_up); } #[simd_test(enable = "avx")] unsafe fn test_mm256_floor_ps() { let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); let result_down = _mm256_floor_ps(a); let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.); assert_eq_m256(result_down, expected_down); } #[simd_test(enable = "avx")] unsafe fn test_mm256_ceil_ps() { let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2); let result_up = _mm256_ceil_ps(a); let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.); assert_eq_m256(result_up, expected_up); } #[simd_test(enable = "avx")] unsafe fn test_mm256_sqrt_pd() { let a = _mm256_setr_pd(4., 9., 16., 25.); let r = _mm256_sqrt_pd(a); let e = _mm256_setr_pd(2., 3., 4., 5.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_sqrt_ps() { let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let r = _mm256_sqrt_ps(a); let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_div_ps() { let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let r = _mm256_div_ps(a, b); let e = _mm256_setr_ps(1., 3., 8., 5., 0.5, 1., 0.25, 0.5); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_div_pd() { let a = _mm256_setr_pd(4., 9., 16., 25.); let b = _mm256_setr_pd(4., 3., 2., 5.); let r = _mm256_div_pd(a, b); let e = _mm256_setr_pd(1., 3., 8., 5.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_blend_pd() { let a = _mm256_setr_pd(4., 9., 16., 25.); let b = _mm256_setr_pd(4., 3., 2., 5.); let r = _mm256_blend_pd(a, b, 0x0); assert_eq_m256d(r, _mm256_setr_pd(4., 9., 16., 25.)); let r = _mm256_blend_pd(a, b, 0x3); assert_eq_m256d(r, _mm256_setr_pd(4., 3., 16., 25.)); let r = _mm256_blend_pd(a, b, 0xF); assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 5.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_blend_ps() { let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.); let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.); let r = _mm256_blend_ps(a, b, 0x0); assert_eq_m256(r, _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.)); let r = _mm256_blend_ps(a, b, 0x3); assert_eq_m256(r, _mm256_setr_ps(2., 3., 5., 8., 9., 12., 13., 16.)); let r = _mm256_blend_ps(a, b, 0xF); assert_eq_m256(r, _mm256_setr_ps(2., 3., 6., 7., 9., 12., 13., 16.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_blendv_pd() { let a = _mm256_setr_pd(4., 9., 16., 25.); let b = _mm256_setr_pd(4., 3., 2., 5.); let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64); let r = _mm256_blendv_pd(a, b, c); let e = _mm256_setr_pd(4., 9., 2., 5.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_blendv_ps() { let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); #[rustfmt::skip] let c = _mm256_setr_ps( 0., 0., 0., 0., !0 as f32, !0 as f32, !0 as f32, !0 as f32, ); let r = _mm256_blendv_ps(a, b, c); let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_dp_ps() { let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let r = _mm256_dp_ps(a, b, 0xFF); let e = _mm256_setr_ps(200., 200., 200., 200., 2387., 2387., 2387., 2387.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_hadd_pd() { let a = _mm256_setr_pd(4., 9., 16., 25.); let b = _mm256_setr_pd(4., 3., 2., 5.); let r = _mm256_hadd_pd(a, b); let e = _mm256_setr_pd(13., 7., 41., 7.); assert_eq_m256d(r, e); let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_hadd_pd(a, b); let e = _mm256_setr_pd(3., 11., 7., 15.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_hadd_ps() { let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let r = _mm256_hadd_ps(a, b); let e = _mm256_setr_ps(13., 41., 7., 7., 13., 41., 17., 114.); assert_eq_m256(r, e); let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); let r = _mm256_hadd_ps(a, b); let e = _mm256_setr_ps(3., 7., 11., 15., 3., 7., 11., 15.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_hsub_pd() { let a = _mm256_setr_pd(4., 9., 16., 25.); let b = _mm256_setr_pd(4., 3., 2., 5.); let r = _mm256_hsub_pd(a, b); let e = _mm256_setr_pd(-5., 1., -9., -3.); assert_eq_m256d(r, e); let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_hsub_pd(a, b); let e = _mm256_setr_pd(-1., -1., -1., -1.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_hsub_ps() { let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let r = _mm256_hsub_ps(a, b); let e = _mm256_setr_ps(-5., -9., 1., -3., -5., -9., -1., 14.); assert_eq_m256(r, e); let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); let r = _mm256_hsub_ps(a, b); let e = _mm256_setr_ps(-1., -1., -1., -1., -1., -1., -1., -1.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_xor_pd() { let a = _mm256_setr_pd(4., 9., 16., 25.); let b = _mm256_set1_pd(0.); let r = _mm256_xor_pd(a, b); assert_eq_m256d(r, a); } #[simd_test(enable = "avx")] unsafe fn test_mm256_xor_ps() { let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let b = _mm256_set1_ps(0.); let r = _mm256_xor_ps(a, b); assert_eq_m256(r, a); } #[simd_test(enable = "avx")] unsafe fn test_mm_cmp_pd() { let a = _mm_setr_pd(4., 9.); let b = _mm_setr_pd(4., 3.); let r = _mm_cmp_pd(a, b, _CMP_GE_OS); assert!(get_m128d(r, 0).is_nan()); assert!(get_m128d(r, 1).is_nan()); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cmp_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_cmp_pd(a, b, _CMP_GE_OS); let e = _mm256_set1_pd(0.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_cmp_ps() { let a = _mm_setr_ps(4., 3., 2., 5.); let b = _mm_setr_ps(4., 9., 16., 25.); let r = _mm_cmp_ps(a, b, _CMP_GE_OS); assert!(get_m128(r, 0).is_nan()); assert_eq!(get_m128(r, 1), 0.); assert_eq!(get_m128(r, 2), 0.); assert_eq!(get_m128(r, 3), 0.); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cmp_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); let r = _mm256_cmp_ps(a, b, _CMP_GE_OS); let e = _mm256_set1_ps(0.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_cmp_sd() { let a = _mm_setr_pd(4., 9.); let b = _mm_setr_pd(4., 3.); let r = _mm_cmp_sd(a, b, _CMP_GE_OS); assert!(get_m128d(r, 0).is_nan()); assert_eq!(get_m128d(r, 1), 9.); } #[simd_test(enable = "avx")] unsafe fn test_mm_cmp_ss() { let a = _mm_setr_ps(4., 3., 2., 5.); let b = _mm_setr_ps(4., 9., 16., 25.); let r = _mm_cmp_ss(a, b, _CMP_GE_OS); assert!(get_m128(r, 0).is_nan()); assert_eq!(get_m128(r, 1), 3.); assert_eq!(get_m128(r, 2), 2.); assert_eq!(get_m128(r, 3), 5.); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cvtepi32_pd() { let a = _mm_setr_epi32(4, 9, 16, 25); let r = _mm256_cvtepi32_pd(a); let e = _mm256_setr_pd(4., 9., 16., 25.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cvtepi32_ps() { let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); let r = _mm256_cvtepi32_ps(a); let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cvtpd_ps() { let a = _mm256_setr_pd(4., 9., 16., 25.); let r = _mm256_cvtpd_ps(a); let e = _mm_setr_ps(4., 9., 16., 25.); assert_eq_m128(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cvtps_epi32() { let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let r = _mm256_cvtps_epi32(a); let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cvtps_pd() { let a = _mm_setr_ps(4., 9., 16., 25.); let r = _mm256_cvtps_pd(a); let e = _mm256_setr_pd(4., 9., 16., 25.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cvttpd_epi32() { let a = _mm256_setr_pd(4., 9., 16., 25.); let r = _mm256_cvttpd_epi32(a); let e = _mm_setr_epi32(4, 9, 16, 25); assert_eq_m128i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cvtpd_epi32() { let a = _mm256_setr_pd(4., 9., 16., 25.); let r = _mm256_cvtpd_epi32(a); let e = _mm_setr_epi32(4, 9, 16, 25); assert_eq_m128i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cvttps_epi32() { let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.); let r = _mm256_cvttps_epi32(a); let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_extractf128_ps() { let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let r = _mm256_extractf128_ps(a, 0); let e = _mm_setr_ps(4., 3., 2., 5.); assert_eq_m128(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_extractf128_pd() { let a = _mm256_setr_pd(4., 3., 2., 5.); let r = _mm256_extractf128_pd(a, 0); let e = _mm_setr_pd(4., 3.); assert_eq_m128d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_extractf128_si256() { let a = _mm256_setr_epi64x(4, 3, 2, 5); let r = _mm256_extractf128_si256(a, 0); let e = _mm_setr_epi64x(4, 3); assert_eq_m128i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_zeroall() { _mm256_zeroall(); } #[simd_test(enable = "avx")] unsafe fn test_mm256_zeroupper() { _mm256_zeroupper(); } #[simd_test(enable = "avx")] unsafe fn test_mm256_permutevar_ps() { let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); let r = _mm256_permutevar_ps(a, b); let e = _mm256_setr_ps(3., 2., 5., 4., 9., 64., 50., 8.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_permutevar_ps() { let a = _mm_setr_ps(4., 3., 2., 5.); let b = _mm_setr_epi32(1, 2, 3, 4); let r = _mm_permutevar_ps(a, b); let e = _mm_setr_ps(3., 2., 5., 4.); assert_eq_m128(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_permute_ps() { let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let r = _mm256_permute_ps(a, 0x1b); let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_permute_ps() { let a = _mm_setr_ps(4., 3., 2., 5.); let r = _mm_permute_ps(a, 0x1b); let e = _mm_setr_ps(5., 2., 3., 4.); assert_eq_m128(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_permutevar_pd() { let a = _mm256_setr_pd(4., 3., 2., 5.); let b = _mm256_setr_epi64x(1, 2, 3, 4); let r = _mm256_permutevar_pd(a, b); let e = _mm256_setr_pd(4., 3., 5., 2.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_permutevar_pd() { let a = _mm_setr_pd(4., 3.); let b = _mm_setr_epi64x(3, 0); let r = _mm_permutevar_pd(a, b); let e = _mm_setr_pd(3., 4.); assert_eq_m128d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_permute_pd() { let a = _mm256_setr_pd(4., 3., 2., 5.); let r = _mm256_permute_pd(a, 5); let e = _mm256_setr_pd(3., 4., 5., 2.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_permute_pd() { let a = _mm_setr_pd(4., 3.); let r = _mm_permute_pd(a, 1); let e = _mm_setr_pd(3., 4.); assert_eq_m128d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_permute2f128_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.); let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.); let r = _mm256_permute2f128_ps(a, b, 0x13); let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_permute2f128_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_permute2f128_pd(a, b, 0x31); let e = _mm256_setr_pd(3., 4., 7., 8.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_permute2f128_si256() { let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4); let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8); let r = _mm256_permute2f128_si256(a, b, 0x20); let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_broadcast_ss() { let r = _mm256_broadcast_ss(&3.); let e = _mm256_set1_ps(3.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_broadcast_ss() { let r = _mm_broadcast_ss(&3.); let e = _mm_set1_ps(3.); assert_eq_m128(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_broadcast_sd() { let r = _mm256_broadcast_sd(&3.); let e = _mm256_set1_pd(3.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_broadcast_ps() { let a = _mm_setr_ps(4., 3., 2., 5.); let r = _mm256_broadcast_ps(&a); let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_broadcast_pd() { let a = _mm_setr_pd(4., 3.); let r = _mm256_broadcast_pd(&a); let e = _mm256_setr_pd(4., 3., 4., 3.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_insertf128_ps() { let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let b = _mm_setr_ps(4., 9., 16., 25.); let r = _mm256_insertf128_ps(a, b, 0); let e = _mm256_setr_ps(4., 9., 16., 25., 8., 9., 64., 50.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_insertf128_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm_setr_pd(5., 6.); let r = _mm256_insertf128_pd(a, b, 0); let e = _mm256_setr_pd(5., 6., 3., 4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_insertf128_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let b = _mm_setr_epi64x(5, 6); let r = _mm256_insertf128_si256(a, b, 0); let e = _mm256_setr_epi64x(5, 6, 3, 4); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_insert_epi8() { #[rustfmt::skip] let a = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); let r = _mm256_insert_epi8(a, 0, 31); #[rustfmt::skip] let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_insert_epi16() { #[rustfmt::skip] let a = _mm256_setr_epi16( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let r = _mm256_insert_epi16(a, 0, 15); #[rustfmt::skip] let e = _mm256_setr_epi16( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_insert_epi32() { let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); let r = _mm256_insert_epi32(a, 0, 7); let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_load_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let p = &a as *const _ as *const f64; let r = _mm256_load_pd(p); let e = _mm256_setr_pd(1., 2., 3., 4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_store_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let mut r = _mm256_undefined_pd(); _mm256_store_pd(&mut r as *mut _ as *mut f64, a); assert_eq_m256d(r, a); } #[simd_test(enable = "avx")] unsafe fn test_mm256_load_ps() { let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let p = &a as *const _ as *const f32; let r = _mm256_load_ps(p); let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_store_ps() { let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); let mut r = _mm256_undefined_ps(); _mm256_store_ps(&mut r as *mut _ as *mut f32, a); assert_eq_m256(r, a); } #[simd_test(enable = "avx")] unsafe fn test_mm256_loadu_pd() { let a = &[1.0f64, 2., 3., 4.]; let p = a.as_ptr(); let r = _mm256_loadu_pd(black_box(p)); let e = _mm256_setr_pd(1., 2., 3., 4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_storeu_pd() { let a = _mm256_set1_pd(9.); let mut r = _mm256_undefined_pd(); _mm256_storeu_pd(&mut r as *mut _ as *mut f64, a); assert_eq_m256d(r, a); } #[simd_test(enable = "avx")] unsafe fn test_mm256_loadu_ps() { let a = &[4., 3., 2., 5., 8., 9., 64., 50.]; let p = a.as_ptr(); let r = _mm256_loadu_ps(black_box(p)); let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_storeu_ps() { let a = _mm256_set1_ps(9.); let mut r = _mm256_undefined_ps(); _mm256_storeu_ps(&mut r as *mut _ as *mut f32, a); assert_eq_m256(r, a); } #[simd_test(enable = "avx")] unsafe fn test_mm256_load_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let p = &a as *const _; let r = _mm256_load_si256(p); let e = _mm256_setr_epi64x(1, 2, 3, 4); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_store_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let mut r = _mm256_undefined_si256(); _mm256_store_si256(&mut r as *mut _, a); assert_eq_m256i(r, a); } #[simd_test(enable = "avx")] unsafe fn test_mm256_loadu_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let p = &a as *const _; let r = _mm256_loadu_si256(black_box(p)); let e = _mm256_setr_epi64x(1, 2, 3, 4); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_storeu_si256() { let a = _mm256_set1_epi8(9); let mut r = _mm256_undefined_si256(); _mm256_storeu_si256(&mut r as *mut _, a); assert_eq_m256i(r, a); } #[simd_test(enable = "avx")] unsafe fn test_mm256_maskload_pd() { let a = &[1.0f64, 2., 3., 4.]; let p = a.as_ptr(); let mask = _mm256_setr_epi64x(0, !0, 0, !0); let r = _mm256_maskload_pd(black_box(p), mask); let e = _mm256_setr_pd(0., 2., 0., 4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_maskstore_pd() { let mut r = _mm256_set1_pd(0.); let mask = _mm256_setr_epi64x(0, !0, 0, !0); let a = _mm256_setr_pd(1., 2., 3., 4.); _mm256_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a); let e = _mm256_setr_pd(0., 2., 0., 4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_maskload_pd() { let a = &[1.0f64, 2.]; let p = a.as_ptr(); let mask = _mm_setr_epi64x(0, !0); let r = _mm_maskload_pd(black_box(p), mask); let e = _mm_setr_pd(0., 2.); assert_eq_m128d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_maskstore_pd() { let mut r = _mm_set1_pd(0.); let mask = _mm_setr_epi64x(0, !0); let a = _mm_setr_pd(1., 2.); _mm_maskstore_pd(&mut r as *mut _ as *mut f64, mask, a); let e = _mm_setr_pd(0., 2.); assert_eq_m128d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_maskload_ps() { let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.]; let p = a.as_ptr(); let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); let r = _mm256_maskload_ps(black_box(p), mask); let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_maskstore_ps() { let mut r = _mm256_set1_ps(0.); let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0); let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); _mm256_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a); let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_maskload_ps() { let a = &[1.0f32, 2., 3., 4.]; let p = a.as_ptr(); let mask = _mm_setr_epi32(0, !0, 0, !0); let r = _mm_maskload_ps(black_box(p), mask); let e = _mm_setr_ps(0., 2., 0., 4.); assert_eq_m128(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm_maskstore_ps() { let mut r = _mm_set1_ps(0.); let mask = _mm_setr_epi32(0, !0, 0, !0); let a = _mm_setr_ps(1., 2., 3., 4.); _mm_maskstore_ps(&mut r as *mut _ as *mut f32, mask, a); let e = _mm_setr_ps(0., 2., 0., 4.); assert_eq_m128(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_movehdup_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_movehdup_ps(a); let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_moveldup_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_moveldup_ps(a); let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_movedup_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let r = _mm256_movedup_pd(a); let e = _mm256_setr_pd(1., 1., 3., 3.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_lddqu_si256() { #[rustfmt::skip] let a = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); let p = &a as *const _; let r = _mm256_lddqu_si256(black_box(p)); #[rustfmt::skip] let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_stream_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let mut r = _mm256_undefined_si256(); _mm256_stream_si256(&mut r as *mut _, a); assert_eq_m256i(r, a); } #[simd_test(enable = "avx")] unsafe fn test_mm256_stream_pd() { #[repr(align(32))] struct Memory { pub data: [f64; 4], } let a = _mm256_set1_pd(7.0); let mut mem = Memory { data: [-1.0; 4] }; _mm256_stream_pd(&mut mem.data[0] as *mut f64, a); for i in 0..4 { assert_eq!(mem.data[i], get_m256d(a, i)); } } #[simd_test(enable = "avx")] unsafe fn test_mm256_stream_ps() { #[repr(align(32))] struct Memory { pub data: [f32; 8], } let a = _mm256_set1_ps(7.0); let mut mem = Memory { data: [-1.0; 8] }; _mm256_stream_ps(&mut mem.data[0] as *mut f32, a); for i in 0..8 { assert_eq!(mem.data[i], get_m256(a, i)); } } #[simd_test(enable = "avx")] unsafe fn test_mm256_rcp_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_rcp_ps(a); #[rustfmt::skip] let e = _mm256_setr_ps( 0.99975586, 0.49987793, 0.33325195, 0.24993896, 0.19995117, 0.16662598, 0.14282227, 0.12496948, ); let rel_err = 0.00048828125; for i in 0..8 { assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err); } } #[simd_test(enable = "avx")] unsafe fn test_mm256_rsqrt_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_rsqrt_ps(a); #[rustfmt::skip] let e = _mm256_setr_ps( 0.99975586, 0.7069092, 0.5772705, 0.49987793, 0.44714355, 0.40820313, 0.3779297, 0.3534546, ); let rel_err = 0.00048828125; for i in 0..8 { assert_approx_eq!(get_m256(r, i), get_m256(e, i), 2. * rel_err); } } #[simd_test(enable = "avx")] unsafe fn test_mm256_unpackhi_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_unpackhi_pd(a, b); let e = _mm256_setr_pd(2., 6., 4., 8.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_unpackhi_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); let r = _mm256_unpackhi_ps(a, b); let e = _mm256_setr_ps(3., 11., 4., 12., 7., 15., 8., 16.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_unpacklo_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_unpacklo_pd(a, b); let e = _mm256_setr_pd(1., 5., 3., 7.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_unpacklo_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.); let r = _mm256_unpacklo_ps(a, b); let e = _mm256_setr_ps(1., 9., 2., 10., 5., 13., 6., 14.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_testz_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let b = _mm256_setr_epi64x(5, 6, 7, 8); let r = _mm256_testz_si256(a, b); assert_eq!(r, 0); let b = _mm256_set1_epi64x(0); let r = _mm256_testz_si256(a, b); assert_eq!(r, 1); } #[simd_test(enable = "avx")] unsafe fn test_mm256_testc_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let b = _mm256_setr_epi64x(5, 6, 7, 8); let r = _mm256_testc_si256(a, b); assert_eq!(r, 0); let b = _mm256_set1_epi64x(0); let r = _mm256_testc_si256(a, b); assert_eq!(r, 1); } #[simd_test(enable = "avx")] unsafe fn test_mm256_testnzc_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let b = _mm256_setr_epi64x(5, 6, 7, 8); let r = _mm256_testnzc_si256(a, b); assert_eq!(r, 1); let a = _mm256_setr_epi64x(0, 0, 0, 0); let b = _mm256_setr_epi64x(0, 0, 0, 0); let r = _mm256_testnzc_si256(a, b); assert_eq!(r, 0); } #[simd_test(enable = "avx")] unsafe fn test_mm256_testz_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_testz_pd(a, b); assert_eq!(r, 1); let a = _mm256_set1_pd(-1.); let r = _mm256_testz_pd(a, a); assert_eq!(r, 0); } #[simd_test(enable = "avx")] unsafe fn test_mm256_testc_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_testc_pd(a, b); assert_eq!(r, 1); let a = _mm256_set1_pd(1.); let b = _mm256_set1_pd(-1.); let r = _mm256_testc_pd(a, b); assert_eq!(r, 0); } #[simd_test(enable = "avx")] unsafe fn test_mm256_testnzc_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 6., 7., 8.); let r = _mm256_testnzc_pd(a, b); assert_eq!(r, 0); let a = _mm256_setr_pd(1., -1., -1., -1.); let b = _mm256_setr_pd(-1., -1., 1., 1.); let r = _mm256_testnzc_pd(a, b); assert_eq!(r, 1); } #[simd_test(enable = "avx")] unsafe fn test_mm_testz_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 6.); let r = _mm_testz_pd(a, b); assert_eq!(r, 1); let a = _mm_set1_pd(-1.); let r = _mm_testz_pd(a, a); assert_eq!(r, 0); } #[simd_test(enable = "avx")] unsafe fn test_mm_testc_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 6.); let r = _mm_testc_pd(a, b); assert_eq!(r, 1); let a = _mm_set1_pd(1.); let b = _mm_set1_pd(-1.); let r = _mm_testc_pd(a, b); assert_eq!(r, 0); } #[simd_test(enable = "avx")] unsafe fn test_mm_testnzc_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 6.); let r = _mm_testnzc_pd(a, b); assert_eq!(r, 0); let a = _mm_setr_pd(1., -1.); let b = _mm_setr_pd(-1., -1.); let r = _mm_testnzc_pd(a, b); assert_eq!(r, 1); } #[simd_test(enable = "avx")] unsafe fn test_mm256_testz_ps() { let a = _mm256_set1_ps(1.); let r = _mm256_testz_ps(a, a); assert_eq!(r, 1); let a = _mm256_set1_ps(-1.); let r = _mm256_testz_ps(a, a); assert_eq!(r, 0); } #[simd_test(enable = "avx")] unsafe fn test_mm256_testc_ps() { let a = _mm256_set1_ps(1.); let r = _mm256_testc_ps(a, a); assert_eq!(r, 1); let b = _mm256_set1_ps(-1.); let r = _mm256_testc_ps(a, b); assert_eq!(r, 0); } #[simd_test(enable = "avx")] unsafe fn test_mm256_testnzc_ps() { let a = _mm256_set1_ps(1.); let r = _mm256_testnzc_ps(a, a); assert_eq!(r, 0); let a = _mm256_setr_ps(1., -1., -1., -1., -1., -1., -1., -1.); let b = _mm256_setr_ps(-1., -1., 1., 1., 1., 1., 1., 1.); let r = _mm256_testnzc_ps(a, b); assert_eq!(r, 1); } #[simd_test(enable = "avx")] unsafe fn test_mm_testz_ps() { let a = _mm_set1_ps(1.); let r = _mm_testz_ps(a, a); assert_eq!(r, 1); let a = _mm_set1_ps(-1.); let r = _mm_testz_ps(a, a); assert_eq!(r, 0); } #[simd_test(enable = "avx")] unsafe fn test_mm_testc_ps() { let a = _mm_set1_ps(1.); let r = _mm_testc_ps(a, a); assert_eq!(r, 1); let b = _mm_set1_ps(-1.); let r = _mm_testc_ps(a, b); assert_eq!(r, 0); } #[simd_test(enable = "avx")] unsafe fn test_mm_testnzc_ps() { let a = _mm_set1_ps(1.); let r = _mm_testnzc_ps(a, a); assert_eq!(r, 0); let a = _mm_setr_ps(1., -1., -1., -1.); let b = _mm_setr_ps(-1., -1., 1., 1.); let r = _mm_testnzc_ps(a, b); assert_eq!(r, 1); } #[simd_test(enable = "avx")] unsafe fn test_mm256_movemask_pd() { let a = _mm256_setr_pd(1., -2., 3., -4.); let r = _mm256_movemask_pd(a); assert_eq!(r, 0xA); } #[simd_test(enable = "avx")] unsafe fn test_mm256_movemask_ps() { let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.); let r = _mm256_movemask_ps(a); assert_eq!(r, 0xAA); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setzero_pd() { let r = _mm256_setzero_pd(); assert_eq_m256d(r, _mm256_set1_pd(0.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setzero_ps() { let r = _mm256_setzero_ps(); assert_eq_m256(r, _mm256_set1_ps(0.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setzero_si256() { let r = _mm256_setzero_si256(); assert_eq_m256i(r, _mm256_set1_epi8(0)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set_pd() { let r = _mm256_set_pd(1., 2., 3., 4.); assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set_ps() { let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.); assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set_epi8() { #[rustfmt::skip] let r = _mm256_set_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); #[rustfmt::skip] let e = _mm256_setr_epi8( 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set_epi16() { #[rustfmt::skip] let r = _mm256_set_epi16( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); #[rustfmt::skip] let e = _mm256_setr_epi16( 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set_epi32() { let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set_epi64x() { let r = _mm256_set_epi64x(1, 2, 3, 4); assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setr_pd() { let r = _mm256_setr_pd(1., 2., 3., 4.); assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setr_ps() { let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setr_epi8() { #[rustfmt::skip] let r = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); #[rustfmt::skip] let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setr_epi16() { #[rustfmt::skip] let r = _mm256_setr_epi16( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); #[rustfmt::skip] let e = _mm256_setr_epi16( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setr_epi32() { let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setr_epi64x() { let r = _mm256_setr_epi64x(1, 2, 3, 4); assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set1_pd() { let r = _mm256_set1_pd(1.); assert_eq_m256d(r, _mm256_set1_pd(1.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set1_ps() { let r = _mm256_set1_ps(1.); assert_eq_m256(r, _mm256_set1_ps(1.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set1_epi8() { let r = _mm256_set1_epi8(1); assert_eq_m256i(r, _mm256_set1_epi8(1)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set1_epi16() { let r = _mm256_set1_epi16(1); assert_eq_m256i(r, _mm256_set1_epi16(1)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set1_epi32() { let r = _mm256_set1_epi32(1); assert_eq_m256i(r, _mm256_set1_epi32(1)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set1_epi64x() { let r = _mm256_set1_epi64x(1); assert_eq_m256i(r, _mm256_set1_epi64x(1)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_castpd_ps() { let a = _mm256_setr_pd(1., 2., 3., 4.); let r = _mm256_castpd_ps(a); let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_castps_pd() { let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); let r = _mm256_castps_pd(a); let e = _mm256_setr_pd(1., 2., 3., 4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_castps_si256() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_castps_si256(a); #[rustfmt::skip] let e = _mm256_setr_epi8( 0, 0, -128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, -128, 64, 0, 0, -96, 64, 0, 0, -64, 64, 0, 0, -32, 64, 0, 0, 0, 65, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_castsi256_ps() { #[rustfmt::skip] let a = _mm256_setr_epi8( 0, 0, -128, 63, 0, 0, 0, 64, 0, 0, 64, 64, 0, 0, -128, 64, 0, 0, -96, 64, 0, 0, -64, 64, 0, 0, -32, 64, 0, 0, 0, 65, ); let r = _mm256_castsi256_ps(a); let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_castpd_si256() { let a = _mm256_setr_pd(1., 2., 3., 4.); let r = _mm256_castpd_si256(a); assert_eq_m256d(transmute(r), a); } #[simd_test(enable = "avx")] unsafe fn test_mm256_castsi256_pd() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let r = _mm256_castsi256_pd(a); assert_eq_m256d(r, transmute(a)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_castps256_ps128() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_castps256_ps128(a); assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_castpd256_pd128() { let a = _mm256_setr_pd(1., 2., 3., 4.); let r = _mm256_castpd256_pd128(a); assert_eq_m128d(r, _mm_setr_pd(1., 2.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_castsi256_si128() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let r = _mm256_castsi256_si128(a); assert_eq_m128i(r, _mm_setr_epi64x(1, 2)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_zextps128_ps256() { let a = _mm_setr_ps(1., 2., 3., 4.); let r = _mm256_zextps128_ps256(a); let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_zextsi128_si256() { let a = _mm_setr_epi64x(1, 2); let r = _mm256_zextsi128_si256(a); let e = _mm256_setr_epi64x(1, 2, 0, 0); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_zextpd128_pd256() { let a = _mm_setr_pd(1., 2.); let r = _mm256_zextpd128_pd256(a); let e = _mm256_setr_pd(1., 2., 0., 0.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set_m128() { let hi = _mm_setr_ps(5., 6., 7., 8.); let lo = _mm_setr_ps(1., 2., 3., 4.); let r = _mm256_set_m128(hi, lo); let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set_m128d() { let hi = _mm_setr_pd(3., 4.); let lo = _mm_setr_pd(1., 2.); let r = _mm256_set_m128d(hi, lo); let e = _mm256_setr_pd(1., 2., 3., 4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_set_m128i() { #[rustfmt::skip] let hi = _mm_setr_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); #[rustfmt::skip] let lo = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm256_set_m128i(hi, lo); #[rustfmt::skip] let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setr_m128() { let lo = _mm_setr_ps(1., 2., 3., 4.); let hi = _mm_setr_ps(5., 6., 7., 8.); let r = _mm256_setr_m128(lo, hi); let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setr_m128d() { let lo = _mm_setr_pd(1., 2.); let hi = _mm_setr_pd(3., 4.); let r = _mm256_setr_m128d(lo, hi); let e = _mm256_setr_pd(1., 2., 3., 4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_setr_m128i() { #[rustfmt::skip] let lo = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); #[rustfmt::skip] let hi = _mm_setr_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); let r = _mm256_setr_m128i(lo, hi); #[rustfmt::skip] let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_loadu2_m128() { let hi = &[5., 6., 7., 8.]; let hiaddr = hi.as_ptr(); let lo = &[1., 2., 3., 4.]; let loaddr = lo.as_ptr(); let r = _mm256_loadu2_m128(hiaddr, loaddr); let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); assert_eq_m256(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_loadu2_m128d() { let hi = &[3., 4.]; let hiaddr = hi.as_ptr(); let lo = &[1., 2.]; let loaddr = lo.as_ptr(); let r = _mm256_loadu2_m128d(hiaddr, loaddr); let e = _mm256_setr_pd(1., 2., 3., 4.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_loadu2_m128i() { #[rustfmt::skip] let hi = _mm_setr_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); #[rustfmt::skip] let lo = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm256_loadu2_m128i(&hi as *const _ as *const _, &lo as *const _ as *const _); #[rustfmt::skip] let e = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx")] unsafe fn test_mm256_storeu2_m128() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let mut hi = _mm_undefined_ps(); let mut lo = _mm_undefined_ps(); _mm256_storeu2_m128( &mut hi as *mut _ as *mut f32, &mut lo as *mut _ as *mut f32, a, ); assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.)); assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_storeu2_m128d() { let a = _mm256_setr_pd(1., 2., 3., 4.); let mut hi = _mm_undefined_pd(); let mut lo = _mm_undefined_pd(); _mm256_storeu2_m128d( &mut hi as *mut _ as *mut f64, &mut lo as *mut _ as *mut f64, a, ); assert_eq_m128d(hi, _mm_setr_pd(3., 4.)); assert_eq_m128d(lo, _mm_setr_pd(1., 2.)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_storeu2_m128i() { #[rustfmt::skip] let a = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); let mut hi = _mm_undefined_si128(); let mut lo = _mm_undefined_si128(); _mm256_storeu2_m128i(&mut hi as *mut _, &mut lo as *mut _, a); #[rustfmt::skip] let e_hi = _mm_setr_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 ); #[rustfmt::skip] let e_lo = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ); assert_eq_m128i(hi, e_hi); assert_eq_m128i(lo, e_lo); } #[simd_test(enable = "avx")] unsafe fn test_mm256_cvtss_f32() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let r = _mm256_cvtss_f32(a); assert_eq!(r, 1.); } } core_arch-0.1.5/src/x86/avx2.rs010064400007650000024000006515101343447103600143400ustar0000000000000000//! Advanced Vector Extensions 2 (AVX) //! //! AVX2 expands most AVX commands to 256-bit wide vector registers and //! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate). //! //! The references are: //! //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: //! Instruction Set Reference, A-Z][intel64_ref]. //! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and //! System Instructions][amd64_ref]. //! //! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick //! overview of the instructions available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, mem::transmute, }; #[cfg(test)] use stdsimd_test::assert_instr; /// Computes the absolute values of packed 32-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpabsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_abs_epi32(a: __m256i) -> __m256i { transmute(pabsd(a.as_i32x8())) } /// Computes the absolute values of packed 16-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpabsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_abs_epi16(a: __m256i) -> __m256i { transmute(pabsw(a.as_i16x16())) } /// Computes the absolute values of packed 8-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpabsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_abs_epi8(a: __m256i) -> __m256i { transmute(pabsb(a.as_i8x32())) } /// Adds packed 64-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) } /// Adds packed 32-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) } /// Adds packed 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) } /// Adds packed 8-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_add_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) } /// Adds packed 8-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i { transmute(paddsb(a.as_i8x32(), b.as_i8x32())) } /// Adds packed 16-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(paddsw(a.as_i16x16(), b.as_i16x16())) } /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddusb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i { transmute(paddusb(a.as_u8x32(), b.as_u8x32())) } /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_adds_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpaddusw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i { transmute(paddusw(a.as_u16x16(), b.as_u16x16())) } /// Concatenates pairs of 16-byte blocks in `a` and `b` into a 32-byte temporary /// result, shifts the result right by `n` bytes, and returns the low 16 bytes. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_alignr_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpalignr, n = 7))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_alignr_epi8(a: __m256i, b: __m256i, n: i32) -> __m256i { let n = n as u32; // If `palignr` is shifting the pair of vectors more than the size of two // lanes, emit zero. if n > 32 { return _mm256_set1_epi8(0); } // If `palignr` is shifting the pair of input vectors more than one lane, // but less than two lanes, convert to shifting in zeroes. let (a, b, n) = if n > 16 { (_mm256_set1_epi8(0), a, n - 16) } else { (a, b, n) }; let a = a.as_i8x32(); let b = b.as_i8x32(); let r: i8x32 = match n { 0 => simd_shuffle32( b, a, [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ], ), 1 => simd_shuffle32( b, a, [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, ], ), 2 => simd_shuffle32( b, a, [ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, ], ), 3 => simd_shuffle32( b, a, [ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, ], ), 4 => simd_shuffle32( b, a, [ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, ], ), 5 => simd_shuffle32( b, a, [ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, ], ), 6 => simd_shuffle32( b, a, [ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, ], ), 7 => simd_shuffle32( b, a, [ 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 23, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, ], ), 8 => simd_shuffle32( b, a, [ 8, 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 24, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, ], ), 9 => simd_shuffle32( b, a, [ 9, 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 25, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, ], ), 10 => simd_shuffle32( b, a, [ 10, 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 26, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, ], ), 11 => simd_shuffle32( b, a, [ 11, 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 27, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, ], ), 12 => simd_shuffle32( b, a, [ 12, 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 28, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, ], ), 13 => simd_shuffle32( b, a, [ 13, 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 29, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, ], ), 14 => simd_shuffle32( b, a, [ 14, 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 30, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, ], ), 15 => simd_shuffle32( b, a, [ 15, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 31, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, ], ), _ => b, }; transmute(r) } /// Computes the bitwise AND of 256 bits (representing integer data) /// in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_and_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vandps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) } /// Computes the bitwise NOT of 256 bits (representing integer data) /// in `a` and then AND with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_andnot_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vandnps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i { let all_ones = _mm256_set1_epi8(-1); transmute(simd_and( simd_xor(a.as_i64x4(), all_ones.as_i64x4()), b.as_i64x4(), )) } /// Averages packed unsigned 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpavgw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i { transmute(pavgw(a.as_u16x16(), b.as_u16x16())) } /// Averages packed unsigned 8-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_avg_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpavgb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i { transmute(pavgb(a.as_u8x32(), b.as_u8x32())) } /// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i32x4(); let b = b.as_i32x4(); macro_rules! blend2 { ($a:expr, $b:expr, $c:expr, $d:expr) => { simd_shuffle4(a, b, [$a, $b, $c, $d]); }; } macro_rules! blend1 { ($a:expr, $b:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => blend2!($a, $b, 2, 3), 0b01 => blend2!($a, $b, 6, 3), 0b10 => blend2!($a, $b, 2, 7), _ => blend2!($a, $b, 6, 7), } }; } let r: i32x4 = match imm8 & 0b11 { 0b00 => blend1!(0, 1), 0b01 => blend1!(4, 1), 0b10 => blend1!(0, 5), _ => blend1!(4, 5), }; transmute(r) } /// Blends packed 32-bit integers from `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vblendps, imm8 = 9))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i32x8(); let b = b.as_i32x8(); macro_rules! blend4 { ( $a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr ) => { simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h]); }; } macro_rules! blend3 { ($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => blend4!($a, $b, $c, $d, $e, $f, 6, 7), 0b01 => blend4!($a, $b, $c, $d, $e, $f, 14, 7), 0b10 => blend4!($a, $b, $c, $d, $e, $f, 6, 15), _ => blend4!($a, $b, $c, $d, $e, $f, 14, 15), } }; } macro_rules! blend2 { ($a:expr, $b:expr, $c:expr, $d:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => blend3!($a, $b, $c, $d, 4, 5), 0b01 => blend3!($a, $b, $c, $d, 12, 5), 0b10 => blend3!($a, $b, $c, $d, 4, 13), _ => blend3!($a, $b, $c, $d, 12, 13), } }; } macro_rules! blend1 { ($a:expr, $b:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => blend2!($a, $b, 2, 3), 0b01 => blend2!($a, $b, 10, 3), 0b10 => blend2!($a, $b, 2, 11), _ => blend2!($a, $b, 10, 11), } }; } let r: i32x8 = match imm8 & 0b11 { 0b00 => blend1!(0, 1), 0b01 => blend1!(8, 1), 0b10 => blend1!(0, 9), _ => blend1!(8, 9), }; transmute(r) } /// Blends packed 16-bit integers from `a` and `b` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blend_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpblendw, imm8 = 9))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blend_epi16(a: __m256i, b: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i16x16(); let b = b.as_i16x16(); macro_rules! blend4 { ( $a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr, $i:expr, $j:expr, $k:expr, $l:expr, $m:expr, $n:expr, $o:expr, $p:expr ) => { simd_shuffle16( a, b, [ $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p, ], ) }; } macro_rules! blend3 { ( $a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $a2:expr, $b2:expr, $c2:expr, $d2:expr, $e2:expr, $f2:expr ) => { match (imm8 >> 6) & 0b11 { 0b00 => blend4!($a, $b, $c, $d, $e, $f, 6, 7, $a2, $b2, $c2, $d2, $e2, $f2, 14, 15), 0b01 => { blend4!($a, $b, $c, $d, $e, $f, 22, 7, $a2, $b2, $c2, $d2, $e2, $f2, 30, 15) } 0b10 => { blend4!($a, $b, $c, $d, $e, $f, 6, 23, $a2, $b2, $c2, $d2, $e2, $f2, 14, 31) } _ => blend4!($a, $b, $c, $d, $e, $f, 22, 23, $a2, $b2, $c2, $d2, $e2, $f2, 30, 31), } }; } macro_rules! blend2 { ( $a:expr, $b:expr, $c:expr, $d:expr, $a2:expr, $b2:expr, $c2:expr, $d2:expr ) => { match (imm8 >> 4) & 0b11 { 0b00 => blend3!($a, $b, $c, $d, 4, 5, $a2, $b2, $c2, $d2, 12, 13), 0b01 => blend3!($a, $b, $c, $d, 20, 5, $a2, $b2, $c2, $d2, 28, 13), 0b10 => blend3!($a, $b, $c, $d, 4, 21, $a2, $b2, $c2, $d2, 12, 29), _ => blend3!($a, $b, $c, $d, 20, 21, $a2, $b2, $c2, $d2, 28, 29), } }; } macro_rules! blend1 { ($a1:expr, $b1:expr, $a2:expr, $b2:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => blend2!($a1, $b1, 2, 3, $a2, $b2, 10, 11), 0b01 => blend2!($a1, $b1, 18, 3, $a2, $b2, 26, 11), 0b10 => blend2!($a1, $b1, 2, 19, $a2, $b2, 10, 27), _ => blend2!($a1, $b1, 18, 19, $a2, $b2, 26, 27), } }; } let r: i16x16 = match imm8 & 0b11 { 0b00 => blend1!(0, 1, 8, 9), 0b01 => blend1!(16, 1, 24, 9), 0b10 => blend1!(0, 17, 8, 25), _ => blend1!(16, 17, 24, 25), }; transmute(r) } /// Blends packed 8-bit integers from `a` and `b` using `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_blendv_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpblendvb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i { transmute(pblendvb(a.as_i8x32(), b.as_i8x32(), mask.as_i8x32())) } /// Broadcasts the low packed 8-bit integer from `a` to all elements of /// the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastb_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastb_epi8(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); let ret = simd_shuffle16(a.as_i8x16(), zero.as_i8x16(), [0_u32; 16]); transmute::(ret) } /// Broadcasts the low packed 8-bit integer from `a` to all elements of /// the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastb_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle32(a.as_i8x16(), zero.as_i8x16(), [0_u32; 32]); transmute::(ret) } // N.B., `simd_shuffle4` with integer data types for `a` and `b` is // often compiled to `vbroadcastss`. /// Broadcasts the low packed 32-bit integer from `a` to all elements of /// the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastd_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastd_epi32(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); let ret = simd_shuffle4(a.as_i32x4(), zero.as_i32x4(), [0_u32; 4]); transmute::(ret) } // N.B., `simd_shuffle4`` with integer data types for `a` and `b` is // often compiled to `vbroadcastss`. /// Broadcasts the low packed 32-bit integer from `a` to all elements of /// the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastd_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle8(a.as_i32x4(), zero.as_i32x4(), [0_u32; 8]); transmute::(ret) } /// Broadcasts the low packed 64-bit integer from `a` to all elements of /// the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastq_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastq_epi64(a: __m128i) -> __m128i { let zero = _mm_setzero_si128().as_i64x2(); let ret = simd_shuffle2(a.as_i64x2(), zero, [0_u32; 2]); transmute::(ret) } // N.B. `simd_shuffle4` with integer data types for `a` and `b` is // often compiled to `vbroadcastsd`. /// Broadcasts the low packed 64-bit integer from `a` to all elements of /// the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastq_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle4(a.as_i64x2(), zero.as_i64x2(), [0_u32; 4]); transmute::(ret) } /// Broadcasts the low double-precision (64-bit) floating-point element /// from `a` to all elements of the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastsd_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vmovddup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastsd_pd(a: __m128d) -> __m128d { simd_shuffle2(a, _mm_setzero_pd(), [0_u32; 2]) } /// Broadcasts the low double-precision (64-bit) floating-point element /// from `a` to all elements of the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsd_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d { simd_shuffle4(a, _mm_setzero_pd(), [0_u32; 4]) } // N.B., `broadcastsi128_si256` is often compiled to `vinsertf128` or // `vbroadcastf128`. /// Broadcasts 128 bits of integer data from a to all 128-bit lanes in /// the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastsi128_si256) #[inline] #[target_feature(enable = "avx2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle4(a.as_i64x2(), zero.as_i64x2(), [0, 1, 0, 1]); transmute::(ret) } /// Broadcasts the low single-precision (32-bit) floating-point element /// from `a` to all elements of the 128-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastss_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastss_ps(a: __m128) -> __m128 { simd_shuffle4(a, _mm_setzero_ps(), [0_u32; 4]) } /// Broadcasts the low single-precision (32-bit) floating-point element /// from `a` to all elements of the 256-bit returned value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastss_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vbroadcastss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastss_ps(a: __m128) -> __m256 { simd_shuffle8(a, _mm_setzero_ps(), [0_u32; 8]) } /// Broadcasts the low packed 16-bit integer from a to all elements of /// the 128-bit returned value /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastw_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_broadcastw_epi16(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); let ret = simd_shuffle8(a.as_i16x8(), zero.as_i16x8(), [0_u32; 8]); transmute::(ret) } /// Broadcasts the low packed 16-bit integer from a to all elements of /// the 256-bit returned value /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastw_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpbroadcastw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i { let zero = _mm_setzero_si128(); let ret = simd_shuffle16(a.as_i16x8(), zero.as_i16x8(), [0_u32; 16]); transmute::(ret) } /// Compares packed 64-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i { transmute::(simd_eq(a.as_i64x4(), b.as_i64x4())) } /// Compares packed 32-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i { transmute::(simd_eq(a.as_i32x8(), b.as_i32x8())) } /// Compares packed 16-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i { transmute::(simd_eq(a.as_i16x16(), b.as_i16x16())) } /// Compares packed 8-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpeq_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpeqb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i { transmute::(simd_eq(a.as_i8x32(), b.as_i8x32())) } /// Compares packed 64-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i { transmute::(simd_gt(a.as_i64x4(), b.as_i64x4())) } /// Compares packed 32-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i { transmute::(simd_gt(a.as_i32x8(), b.as_i32x8())) } /// Compares packed 16-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i { transmute::(simd_gt(a.as_i16x16(), b.as_i16x16())) } /// Compares packed 8-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cmpgt_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpcmpgtb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i { transmute::(simd_gt(a.as_i8x32(), b.as_i8x32())) } /// Sign-extend 16-bit integers to 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i { transmute::(simd_cast(a.as_i16x8())) } /// Sign-extend 16-bit integers to 64-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi16_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i { let a = a.as_i16x8(); let v64: i16x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); transmute::(simd_cast(v64)) } /// Sign-extend 32-bit integers to 64-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi32_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i { transmute::(simd_cast(a.as_i32x4())) } /// Sign-extend 8-bit integers to 16-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i { transmute::(simd_cast(a.as_i8x16())) } /// Sign-extend 8-bit integers to 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i { let a = a.as_i8x16(); let v64: i8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute::(simd_cast(v64)) } /// Sign-extend 8-bit integers to 64-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepi8_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovsxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i { let a = a.as_i8x16(); let v32: i8x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); transmute::(simd_cast(v32)) } /// Zeroes extend packed unsigned 16-bit integers in `a` to packed 32-bit /// integers, and stores the results in `dst`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i { transmute::(simd_cast(a.as_u16x8())) } /// Zero-extend the lower four unsigned 16-bit integers in `a` to 64-bit /// integers. The upper four elements of `a` are unused. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu16_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i { let a = a.as_u16x8(); let v64: u16x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); transmute::(simd_cast(v64)) } /// Zero-extend unsigned 32-bit integers in `a` to 64-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i { transmute::(simd_cast(a.as_u32x4())) } /// Zero-extend unsigned 8-bit integers in `a` to 16-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i { transmute::(simd_cast(a.as_u8x16())) } /// Zero-extend the lower eight unsigned 8-bit integers in `a` to 32-bit /// integers. The upper eight elements of `a` are unused. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i { let a = a.as_u8x16(); let v64: u8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute::(simd_cast(v64)) } /// Zero-extend the lower four unsigned 8-bit integers in `a` to 64-bit /// integers. The upper twelve elements of `a` are unused. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu8_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovzxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i { let a = a.as_u8x16(); let v32: u8x4 = simd_shuffle4(a, a, [0, 1, 2, 3]); transmute::(simd_cast(v32)) } /// Extracts 128 bits (of integer data) from `a` selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extracti128_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(vextractf128, imm8 = 1) )] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extracti128_si256(a: __m256i, imm8: i32) -> __m128i { let a = a.as_i64x4(); let b = _mm256_undefined_si256().as_i64x4(); let dst: i64x2 = match imm8 & 0b01 { 0 => simd_shuffle2(a, b, [0, 1]), _ => simd_shuffle2(a, b, [2, 3]), }; transmute(dst) } /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphaddw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(phaddw(a.as_i16x16(), b.as_i16x16())) } /// Horizontally adds adjacent pairs of 32-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadd_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphaddd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(phaddd(a.as_i32x8(), b.as_i32x8())) } /// Horizontally adds adjacent pairs of 16-bit integers in `a` and `b` /// using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hadds_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphaddsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) } /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphsubw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(phsubw(a.as_i16x16(), b.as_i16x16())) } /// Horizontally subtract adjacent pairs of 32-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsub_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphsubd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(phsubd(a.as_i32x8(), b.as_i32x8())) } /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b` /// using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_hsubs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vphsubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i32gather_epi32(slice: *const i32, offsets: __m128i, scale: i32) -> __m128i { let zero = _mm_setzero_si128().as_i32x4(); let neg_one = _mm_set1_epi32(-1).as_i32x4(); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherdd(zero, slice, offsets, neg_one, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i32gather_epi32( src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i, scale: i32, ) -> __m128i { let src = src.as_i32x4(); let mask = mask.as_i32x4(); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherdd(src, slice, offsets, mask, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i32gather_epi32(slice: *const i32, offsets: __m256i, scale: i32) -> __m256i { let zero = _mm256_setzero_si256().as_i32x8(); let neg_one = _mm256_set1_epi32(-1).as_i32x8(); let offsets = offsets.as_i32x8(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherdd(zero, slice, offsets, neg_one, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i32gather_epi32( src: __m256i, slice: *const i32, offsets: __m256i, mask: __m256i, scale: i32, ) -> __m256i { let src = src.as_i32x8(); let mask = mask.as_i32x8(); let offsets = offsets.as_i32x8(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherdd(src, slice, offsets, mask, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i32gather_ps(slice: *const f32, offsets: __m128i, scale: i32) -> __m128 { let zero = _mm_setzero_ps(); let neg_one = _mm_set1_ps(-1.0); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherdps(zero, slice, offsets, neg_one, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i32gather_ps( src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32, ) -> __m128 { let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherdps(src, slice, offsets, mask, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i32gather_ps(slice: *const f32, offsets: __m256i, scale: i32) -> __m256 { let zero = _mm256_setzero_ps(); let neg_one = _mm256_set1_ps(-1.0); let offsets = offsets.as_i32x8(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherdps(zero, slice, offsets, neg_one, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdps, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i32gather_ps( src: __m256, slice: *const f32, offsets: __m256i, mask: __m256, scale: i32, ) -> __m256 { let offsets = offsets.as_i32x8(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherdps(src, slice, offsets, mask, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i32gather_epi64(slice: *const i64, offsets: __m128i, scale: i32) -> __m128i { let zero = _mm_setzero_si128().as_i64x2(); let neg_one = _mm_set1_epi64x(-1).as_i64x2(); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherdq(zero, slice, offsets, neg_one, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i32gather_epi64( src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i, scale: i32, ) -> __m128i { let src = src.as_i64x2(); let mask = mask.as_i64x2(); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherdq(src, slice, offsets, mask, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i32gather_epi64(slice: *const i64, offsets: __m128i, scale: i32) -> __m256i { let zero = _mm256_setzero_si256().as_i64x4(); let neg_one = _mm256_set1_epi64x(-1).as_i64x4(); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherdq(zero, slice, offsets, neg_one, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i32gather_epi64( src: __m256i, slice: *const i64, offsets: __m128i, mask: __m256i, scale: i32, ) -> __m256i { let src = src.as_i64x4(); let mask = mask.as_i64x4(); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherdq(src, slice, offsets, mask, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i32gather_pd(slice: *const f64, offsets: __m128i, scale: i32) -> __m128d { let zero = _mm_setzero_pd(); let neg_one = _mm_set1_pd(-1.0); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherdpd(zero, slice, offsets, neg_one, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i32gather_pd( src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d, scale: i32, ) -> __m128d { let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherdpd(src, slice, offsets, mask, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i32gather_pd(slice: *const f64, offsets: __m128i, scale: i32) -> __m256d { let zero = _mm256_setzero_pd(); let neg_one = _mm256_set1_pd(-1.0); let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherdpd(zero, slice, offsets, neg_one, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i32gather_pd( src: __m256d, slice: *const f64, offsets: __m128i, mask: __m256d, scale: i32, ) -> __m256d { let offsets = offsets.as_i32x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherdpd(src, slice, offsets, mask, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i64gather_epi32(slice: *const i32, offsets: __m128i, scale: i32) -> __m128i { let zero = _mm_setzero_si128().as_i32x4(); let neg_one = _mm_set1_epi64x(-1).as_i32x4(); let offsets = offsets.as_i64x2(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherqd(zero, slice, offsets, neg_one, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i64gather_epi32( src: __m128i, slice: *const i32, offsets: __m128i, mask: __m128i, scale: i32, ) -> __m128i { let src = src.as_i32x4(); let mask = mask.as_i32x4(); let offsets = offsets.as_i64x2(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherqd(src, slice, offsets, mask, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i64gather_epi32(slice: *const i32, offsets: __m256i, scale: i32) -> __m128i { let zero = _mm_setzero_si128().as_i32x4(); let neg_one = _mm_set1_epi64x(-1).as_i32x4(); let offsets = offsets.as_i64x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherqd(zero, slice, offsets, neg_one, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i64gather_epi32( src: __m128i, slice: *const i32, offsets: __m256i, mask: __m128i, scale: i32, ) -> __m128i { let src = src.as_i32x4(); let mask = mask.as_i32x4(); let offsets = offsets.as_i64x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherqd(src, slice, offsets, mask, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i64gather_ps(slice: *const f32, offsets: __m128i, scale: i32) -> __m128 { let zero = _mm_setzero_ps(); let neg_one = _mm_set1_ps(-1.0); let offsets = offsets.as_i64x2(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherqps(zero, slice, offsets, neg_one, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i64gather_ps( src: __m128, slice: *const f32, offsets: __m128i, mask: __m128, scale: i32, ) -> __m128 { let offsets = offsets.as_i64x2(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherqps(src, slice, offsets, mask, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i64gather_ps(slice: *const f32, offsets: __m256i, scale: i32) -> __m128 { let zero = _mm_setzero_ps(); let neg_one = _mm_set1_ps(-1.0); let offsets = offsets.as_i64x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherqps(zero, slice, offsets, neg_one, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqps, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i64gather_ps( src: __m128, slice: *const f32, offsets: __m256i, mask: __m128, scale: i32, ) -> __m128 { let offsets = offsets.as_i64x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherqps(src, slice, offsets, mask, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i64gather_epi64(slice: *const i64, offsets: __m128i, scale: i32) -> __m128i { let zero = _mm_setzero_si128().as_i64x2(); let neg_one = _mm_set1_epi64x(-1).as_i64x2(); let slice = slice as *const i8; let offsets = offsets.as_i64x2(); macro_rules! call { ($imm8:expr) => { pgatherqq(zero, slice, offsets, neg_one, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i64gather_epi64( src: __m128i, slice: *const i64, offsets: __m128i, mask: __m128i, scale: i32, ) -> __m128i { let src = src.as_i64x2(); let mask = mask.as_i64x2(); let offsets = offsets.as_i64x2(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { pgatherqq(src, slice, offsets, mask, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i64gather_epi64(slice: *const i64, offsets: __m256i, scale: i32) -> __m256i { let zero = _mm256_setzero_si256().as_i64x4(); let neg_one = _mm256_set1_epi64x(-1).as_i64x4(); let slice = slice as *const i8; let offsets = offsets.as_i64x4(); macro_rules! call { ($imm8:expr) => { vpgatherqq(zero, slice, offsets, neg_one, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i64gather_epi64( src: __m256i, slice: *const i64, offsets: __m256i, mask: __m256i, scale: i32, ) -> __m256i { let src = src.as_i64x4(); let mask = mask.as_i64x4(); let offsets = offsets.as_i64x4(); let slice = slice as *const i8; macro_rules! call { ($imm8:expr) => { vpgatherqq(src, slice, offsets, mask, $imm8) }; } let r = constify_imm8!(scale, call); transmute(r) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_i64gather_pd(slice: *const f64, offsets: __m128i, scale: i32) -> __m128d { let zero = _mm_setzero_pd(); let neg_one = _mm_set1_pd(-1.0); let slice = slice as *const i8; let offsets = offsets.as_i64x2(); macro_rules! call { ($imm8:expr) => { pgatherqpd(zero, slice, offsets, neg_one, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mask_i64gather_pd( src: __m128d, slice: *const f64, offsets: __m128i, mask: __m128d, scale: i32, ) -> __m128d { let slice = slice as *const i8; let offsets = offsets.as_i64x2(); macro_rules! call { ($imm8:expr) => { pgatherqpd(src, slice, offsets, mask, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_i64gather_pd(slice: *const f64, offsets: __m256i, scale: i32) -> __m256d { let zero = _mm256_setzero_pd(); let neg_one = _mm256_set1_pd(-1.0); let slice = slice as *const i8; let offsets = offsets.as_i64x4(); macro_rules! call { ($imm8:expr) => { vpgatherqpd(zero, slice, offsets, neg_one, $imm8) }; } constify_imm8!(scale, call) } /// Returns values from `slice` at offsets determined by `offsets * scale`, /// where /// `scale` is between 1 and 8. If mask is set, load the value from `src` in /// that position instead. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64gather_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mask_i64gather_pd( src: __m256d, slice: *const f64, offsets: __m256i, mask: __m256d, scale: i32, ) -> __m256d { let slice = slice as *const i8; let offsets = offsets.as_i64x4(); macro_rules! call { ($imm8:expr) => { vpgatherqpd(src, slice, offsets, mask, $imm8) }; } constify_imm8!(scale, call) } /// Copies `a` to `dst`, then insert 128 bits (of integer data) from `b` at the /// location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_inserti128_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(vinsertf128, imm8 = 1) )] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_inserti128_si256(a: __m256i, b: __m128i, imm8: i32) -> __m256i { let a = a.as_i64x4(); let b = _mm256_castsi128_si256(b).as_i64x4(); let dst: i64x4 = match imm8 & 0b01 { 0 => simd_shuffle4(a, b, [4, 5, 2, 3]), _ => simd_shuffle4(a, b, [0, 1, 4, 5]), }; transmute(dst) } /// Multiplies packed signed 16-bit integers in `a` and `b`, producing /// intermediate signed 32-bit integers. Horizontally add adjacent pairs /// of intermediate 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_madd_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaddwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) } /// Vertically multiplies each unsigned 8-bit integer from `a` with the /// corresponding signed 8-bit integer from `b`, producing intermediate /// signed 16-bit integers. Horizontally add adjacent pairs of intermediate /// signed 16-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maddubs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaddubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) } /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i { transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4())) } /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i { transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8())) } /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskload_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i { transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2())) } /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask` /// (elements are zeroed out when the highest bit is not set in the /// corresponding element). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskload_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i { transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4())) } /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) { maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4()) } /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) { maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8()) } /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskstore_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) { maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2()) } /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr` /// using `mask` (elements are not stored when the highest bit is not set /// in the corresponding element). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskstore_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaskmovq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) { maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4()) } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(pmaxsw(a.as_i16x16(), b.as_i16x16())) } /// Compares packed 32-bit integers in `a` and `b`, and returns the packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(pmaxsd(a.as_i32x8(), b.as_i32x8())) } /// Compares packed 8-bit integers in `a` and `b`, and returns the packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { transmute(pmaxsb(a.as_i8x32(), b.as_i8x32())) } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns /// the packed maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { transmute(pmaxuw(a.as_u16x16(), b.as_u16x16())) } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns /// the packed maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxud))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { transmute(pmaxud(a.as_u32x8(), b.as_u32x8())) } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns /// the packed maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_max_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmaxub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { transmute(pmaxub(a.as_u8x32(), b.as_u8x32())) } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(pminsw(a.as_i16x16(), b.as_i16x16())) } /// Compares packed 32-bit integers in `a` and `b`, and returns the packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(pminsd(a.as_i32x8(), b.as_i32x8())) } /// Compares packed 8-bit integers in `a` and `b`, and returns the packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { transmute(pminsb(a.as_i8x32(), b.as_i8x32())) } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns /// the packed minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { transmute(pminuw(a.as_u16x16(), b.as_u16x16())) } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns /// the packed minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminud))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { transmute(pminud(a.as_u32x8(), b.as_u32x8())) } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns /// the packed minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_min_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpminub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { transmute(pminub(a.as_u8x32(), b.as_u8x32())) } /// Creates mask from the most significant bit of each 8-bit element in `a`, /// return the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_movemask_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmovmskb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { pmovmskb(a.as_i8x32()) } /// Computes the sum of absolute differences (SADs) of quadruplets of unsigned /// 8-bit integers in `a` compared to those in `b`, and stores the 16-bit /// results in dst. Eight SADs are performed for each 128-bit lane using one /// quadruplet from `b` and eight quadruplets from `a`. One quadruplet is /// selected from `b` starting at on the offset specified in `imm8`. Eight /// quadruplets are formed from sequential 8-bit integers selected from `a` /// starting at the offset specified in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mpsadbw_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vmpsadbw, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mpsadbw_epu8(a: __m256i, b: __m256i, imm8: i32) -> __m256i { let a = a.as_u8x32(); let b = b.as_u8x32(); macro_rules! call { ($imm8:expr) => { mpsadbw(a, b, $imm8) }; } let r = constify_imm8!(imm8, call); transmute(r) } /// Multiplies the low 32-bit integers from each packed 64-bit element in /// `a` and `b` /// /// Returns the 64-bit results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmuldq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(pmuldq(a.as_i32x8(), b.as_i32x8())) } /// Multiplies the low unsigned 32-bit integers from each packed 64-bit /// element in `a` and `b` /// /// Returns the unsigned 64-bit results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mul_epu32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmuludq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i { transmute(pmuludq(a.as_u32x8(), b.as_u32x8())) } /// Multiplies the packed 16-bit integers in `a` and `b`, producing /// intermediate 32-bit integers and returning the high 16 bits of the /// intermediate integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulhw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(pmulhw(a.as_i16x16(), b.as_i16x16())) } /// Multiplies the packed unsigned 16-bit integers in `a` and `b`, producing /// intermediate 32-bit integers and returning the high 16 bits of the /// intermediate integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhi_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulhuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i { transmute(pmulhuw(a.as_u16x16(), b.as_u16x16())) } /// Multiplies the packed 16-bit integers in `a` and `b`, producing /// intermediate 32-bit integers, and returns the low 16 bits of the /// intermediate integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmullw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) } /// Multiplies the packed 32-bit integers in `a` and `b`, producing /// intermediate 64-bit integers, and returns the low 16 bits of the /// intermediate integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mullo_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulld))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) } /// Multiplies packed 16-bit integers in `a` and `b`, producing /// intermediate signed 32-bit integers. Truncate each intermediate /// integer to the 18 most significant bits, round by adding 1, and /// return bits `[16:1]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mulhrs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpmulhrsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(pmulhrsw(a.as_i16x16(), b.as_i16x16())) } /// Computes the bitwise OR of 256 bits (representing integer data) in `a` /// and `b` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_or_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) } /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpacksswb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packs_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackssdw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) } /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using unsigned saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackuswb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using unsigned saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_packus_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackusdw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) } /// Permutes packed 32-bit integers from `a` according to the content of `b`. /// /// The last 3 bits of each integer of `b` are used as addresses into the 8 /// integers of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(permd(a.as_u32x8(), b.as_u32x8())) } /// Permutes 64-bit integers from `a` using control mask `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermpd, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute4x64_epi64(a: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let zero = _mm256_setzero_si256().as_i64x4(); let a = a.as_i64x4(); macro_rules! permute4 { ($a:expr, $b:expr, $c:expr, $d:expr) => { simd_shuffle4(a, zero, [$a, $b, $c, $d]); }; } macro_rules! permute3 { ($a:expr, $b:expr, $c:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => permute4!($a, $b, $c, 0), 0b01 => permute4!($a, $b, $c, 1), 0b10 => permute4!($a, $b, $c, 2), _ => permute4!($a, $b, $c, 3), } }; } macro_rules! permute2 { ($a:expr, $b:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => permute3!($a, $b, 0), 0b01 => permute3!($a, $b, 1), 0b10 => permute3!($a, $b, 2), _ => permute3!($a, $b, 3), } }; } macro_rules! permute1 { ($a:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => permute2!($a, 0), 0b01 => permute2!($a, 1), 0b10 => permute2!($a, 2), _ => permute2!($a, 3), } }; } let r: i64x4 = match imm8 & 0b11 { 0b00 => permute1!(0), 0b01 => permute1!(1), 0b10 => permute1!(2), _ => permute1!(3), }; transmute(r) } /// Shuffles 128-bits of integer data selected by `imm8` from `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute2x128_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vperm2f128, imm8 = 9))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute2x128_si256(a: __m256i, b: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); let b = b.as_i64x4(); macro_rules! call { ($imm8:expr) => { vperm2i128(a, b, $imm8) }; } transmute(constify_imm8!(imm8, call)) } /// Shuffles 64-bit floating-point elements in `a` across lanes using the /// control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permute4x64_pd) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermpd, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permute4x64_pd(a: __m256d, imm8: i32) -> __m256d { let imm8 = (imm8 & 0xFF) as u8; let undef = _mm256_undefined_pd(); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { simd_shuffle4(a, undef, [$x01, $x23, $x45, $x67]) }; } macro_rules! shuffle_x67 { ($x01:expr, $x23:expr, $x45:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => shuffle_done!($x01, $x23, $x45, 0), 0b01 => shuffle_done!($x01, $x23, $x45, 1), 0b10 => shuffle_done!($x01, $x23, $x45, 2), _ => shuffle_done!($x01, $x23, $x45, 3), } }; } macro_rules! shuffle_x45 { ($x01:expr, $x23:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => shuffle_x67!($x01, $x23, 0), 0b01 => shuffle_x67!($x01, $x23, 1), 0b10 => shuffle_x67!($x01, $x23, 2), _ => shuffle_x67!($x01, $x23, 3), } }; } macro_rules! shuffle_x23 { ($x01:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => shuffle_x45!($x01, 0), 0b01 => shuffle_x45!($x01, 1), 0b10 => shuffle_x45!($x01, 2), _ => shuffle_x45!($x01, 3), } }; } match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), } } /// Shuffles eight 32-bit foating-point elements in `a` across lanes using /// the corresponding 32-bit integer index in `idx`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutevar8x32_ps) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_permutevar8x32_ps(a: __m256, idx: __m256i) -> __m256 { permps(a, idx.as_i32x8()) } /// Computes the absolute differences of packed unsigned 8-bit integers in `a` /// and `b`, then horizontally sum each consecutive 8 differences to /// produce four unsigned 16-bit integers, and pack these unsigned 16-bit /// integers in the low 16 bits of the 64-bit return value /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sad_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsadbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sad_epu8(a: __m256i, b: __m256i) -> __m256i { transmute(psadbw(a.as_u8x32(), b.as_u8x32())) } /// Shuffles bytes from `a` according to the content of `b`. /// /// The last 4 bits of each byte of `b` are used as addresses into the 32 bytes /// of `a`. /// /// In addition, if the highest significant bit of a byte of `b` is set, the /// respective destination byte is set to 0. /// /// The low and high halves of the vectors are shuffled separately. /// /// Picturing `a` and `b` as `[u8; 32]`, `_mm256_shuffle_epi8` is logically /// equivalent to: /// /// ``` /// fn mm256_shuffle_epi8(a: [u8; 32], b: [u8; 32]) -> [u8; 32] { /// let mut r = [0; 32]; /// for i in 0..16 { /// // if the most significant bit of b is set, /// // then the destination byte is set to 0. /// if b[i] & 0x80 == 0u8 { /// r[i] = a[(b[i] % 16) as usize]; /// } /// if b[i + 16] & 0x80 == 0u8 { /// r[i + 16] = a[(b[i + 16] % 16 + 16) as usize]; /// } /// } /// r /// } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpshufb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i { transmute(pshufb(a.as_u8x32(), b.as_u8x32())) } /// Shuffles 32-bit integers in 128-bit lanes of `a` using the control in /// `imm8`. /// /// ```rust /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("avx2") { /// # #[target_feature(enable = "avx2")] /// # unsafe fn worker() { /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); /// /// let c1 = _mm256_shuffle_epi32(a, 0b00_11_10_01); /// let c2 = _mm256_shuffle_epi32(a, 0b01_00_10_11); /// /// let expected1 = _mm256_setr_epi32(1, 2, 3, 0, 5, 6, 7, 4); /// let expected2 = _mm256_setr_epi32(3, 2, 0, 1, 7, 6, 4, 5); /// /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c1, expected1)), !0); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c2, expected2)), !0); /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i { // simd_shuffleX requires that its selector parameter be made up of // constant values, but we can't enforce that here. In spirit, we need // to write a `match` on all possible values of a byte, and for each value, // hard-code the correct `simd_shuffleX` call using only constants. We // then hope for LLVM to do the rest. // // Of course, that's... awful. So we try to use macros to do it for us. let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i32x8(); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { simd_shuffle8( a, a, [ $x01, $x23, $x45, $x67, 4 + $x01, 4 + $x23, 4 + $x45, 4 + $x67, ], ) }; } macro_rules! shuffle_x67 { ($x01:expr, $x23:expr, $x45:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => shuffle_done!($x01, $x23, $x45, 0), 0b01 => shuffle_done!($x01, $x23, $x45, 1), 0b10 => shuffle_done!($x01, $x23, $x45, 2), _ => shuffle_done!($x01, $x23, $x45, 3), } }; } macro_rules! shuffle_x45 { ($x01:expr, $x23:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => shuffle_x67!($x01, $x23, 0), 0b01 => shuffle_x67!($x01, $x23, 1), 0b10 => shuffle_x67!($x01, $x23, 2), _ => shuffle_x67!($x01, $x23, 3), } }; } macro_rules! shuffle_x23 { ($x01:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => shuffle_x45!($x01, 0), 0b01 => shuffle_x45!($x01, 1), 0b10 => shuffle_x45!($x01, 2), _ => shuffle_x45!($x01, 3), } }; } let r: i32x8 = match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), }; transmute(r) } /// Shuffles 16-bit integers in the high 64 bits of 128-bit lanes of `a` using /// the control in `imm8`. The low 64 bits of 128-bit lanes of `a` are copied /// to the output. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflehi_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpshufhw, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shufflehi_epi16(a: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i16x16(); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { #[rustfmt::skip] simd_shuffle16(a, a, [ 0, 1, 2, 3, 4+$x01, 4+$x23, 4+$x45, 4+$x67, 8, 9, 10, 11, 12+$x01, 12+$x23, 12+$x45, 12+$x67 ]); }; } macro_rules! shuffle_x67 { ($x01:expr, $x23:expr, $x45:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => shuffle_done!($x01, $x23, $x45, 0), 0b01 => shuffle_done!($x01, $x23, $x45, 1), 0b10 => shuffle_done!($x01, $x23, $x45, 2), _ => shuffle_done!($x01, $x23, $x45, 3), } }; } macro_rules! shuffle_x45 { ($x01:expr, $x23:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => shuffle_x67!($x01, $x23, 0), 0b01 => shuffle_x67!($x01, $x23, 1), 0b10 => shuffle_x67!($x01, $x23, 2), _ => shuffle_x67!($x01, $x23, 3), } }; } macro_rules! shuffle_x23 { ($x01:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => shuffle_x45!($x01, 0), 0b01 => shuffle_x45!($x01, 1), 0b10 => shuffle_x45!($x01, 2), _ => shuffle_x45!($x01, 3), } }; } let r: i16x16 = match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), }; transmute(r) } /// Shuffles 16-bit integers in the low 64 bits of 128-bit lanes of `a` using /// the control in `imm8`. The high 64 bits of 128-bit lanes of `a` are copied /// to the output. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shufflelo_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpshuflw, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_shufflelo_epi16(a: __m256i, imm8: i32) -> __m256i { let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i16x16(); macro_rules! shuffle_done { ($x01: expr, $x23: expr, $x45: expr, $x67: expr) => { #[rustfmt::skip] simd_shuffle16(a, a, [ 0+$x01, 0+$x23, 0+$x45, 0+$x67, 4, 5, 6, 7, 8+$x01, 8+$x23, 8+$x45, 8+$x67, 12, 13, 14, 15, ]); }; } macro_rules! shuffle_x67 { ($x01:expr, $x23:expr, $x45:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => shuffle_done!($x01, $x23, $x45, 0), 0b01 => shuffle_done!($x01, $x23, $x45, 1), 0b10 => shuffle_done!($x01, $x23, $x45, 2), _ => shuffle_done!($x01, $x23, $x45, 3), } }; } macro_rules! shuffle_x45 { ($x01:expr, $x23:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => shuffle_x67!($x01, $x23, 0), 0b01 => shuffle_x67!($x01, $x23, 1), 0b10 => shuffle_x67!($x01, $x23, 2), _ => shuffle_x67!($x01, $x23, 3), } }; } macro_rules! shuffle_x23 { ($x01:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => shuffle_x45!($x01, 0), 0b01 => shuffle_x45!($x01, 1), 0b10 => shuffle_x45!($x01, 2), _ => shuffle_x45!($x01, 3), } }; } let r: i16x16 = match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), }; transmute(r) } /// Negates packed 16-bit integers in `a` when the corresponding signed /// 16-bit integer in `b` is negative, and returns the results. /// Results are zeroed out when the corresponding element in `b` is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsignw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sign_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(psignw(a.as_i16x16(), b.as_i16x16())) } /// Negates packed 32-bit integers in `a` when the corresponding signed /// 32-bit integer in `b` is negative, and returns the results. /// Results are zeroed out when the corresponding element in `b` is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsignd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sign_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(psignd(a.as_i32x8(), b.as_i32x8())) } /// Negates packed 8-bit integers in `a` when the corresponding signed /// 8-bit integer in `b` is negative, and returns the results. /// Results are zeroed out when the corresponding element in `b` is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sign_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsignb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sign_epi8(a: __m256i, b: __m256i) -> __m256i { transmute(psignb(a.as_i8x32(), b.as_i8x32())) } /// Shifts packed 16-bit integers in `a` left by `count` while /// shifting in zeros, and returns the result /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sll_epi16(a: __m256i, count: __m128i) -> __m256i { transmute(psllw(a.as_i16x16(), count.as_i16x8())) } /// Shifts packed 32-bit integers in `a` left by `count` while /// shifting in zeros, and returns the result /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslld))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sll_epi32(a: __m256i, count: __m128i) -> __m256i { transmute(pslld(a.as_i32x8(), count.as_i32x4())) } /// Shifts packed 64-bit integers in `a` left by `count` while /// shifting in zeros, and returns the result /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sll_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i { transmute(psllq(a.as_i64x4(), count.as_i64x2())) } /// Shifts packed 16-bit integers in `a` left by `imm8` while /// shifting in zeros, return the results; /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_slli_epi16(a: __m256i, imm8: i32) -> __m256i { transmute(pslliw(a.as_i16x16(), imm8)) } /// Shifts packed 32-bit integers in `a` left by `imm8` while /// shifting in zeros, return the results; /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslld))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_slli_epi32(a: __m256i, imm8: i32) -> __m256i { transmute(psllid(a.as_i32x8(), imm8)) } /// Shifts packed 64-bit integers in `a` left by `imm8` while /// shifting in zeros, return the results; /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_slli_epi64(a: __m256i, imm8: i32) -> __m256i { transmute(pslliq(a.as_i64x4(), imm8)) } /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_slli_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_slli_si256(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { ($imm8:expr) => { vpslldq(a, $imm8) }; } transmute(constify_imm8!(imm8 * 8, call)) } /// Shifts 128-bit lanes in `a` left by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bslli_epi128) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpslldq, imm8 = 3))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_bslli_epi128(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { ($imm8:expr) => { vpslldq(a, $imm8) }; } transmute(constify_imm8!(imm8 * 8, call)) } /// Shifts packed 32-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while /// shifting in zeros, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) } /// Shifts packed 32-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while /// shifting in zeros, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) } /// Shifts packed 64-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while /// shifting in zeros, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sllv_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) } /// Shifts packed 64-bit integers in `a` left by the amount /// specified by the corresponding element in `count` while /// shifting in zeros, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sllv_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsllvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) } /// Shifts packed 16-bit integers in `a` right by `count` while /// shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsraw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sra_epi16(a: __m256i, count: __m128i) -> __m256i { transmute(psraw(a.as_i16x16(), count.as_i16x8())) } /// Shifts packed 32-bit integers in `a` right by `count` while /// shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sra_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrad))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i { transmute(psrad(a.as_i32x8(), count.as_i32x4())) } /// Shifts packed 16-bit integers in `a` right by `imm8` while /// shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsraw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srai_epi16(a: __m256i, imm8: i32) -> __m256i { transmute(psraiw(a.as_i16x16(), imm8)) } /// Shifts packed 32-bit integers in `a` right by `imm8` while /// shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srai_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrad))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srai_epi32(a: __m256i, imm8: i32) -> __m256i { transmute(psraid(a.as_i32x8(), imm8)) } /// Shifts packed 32-bit integers in `a` right by the amount specified by the /// corresponding element in `count` while shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srav_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsravd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i { transmute(psravd(a.as_i32x4(), count.as_i32x4())) } /// Shifts packed 32-bit integers in `a` right by the amount specified by the /// corresponding element in `count` while shifting in sign bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srav_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsravd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) } /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srli_si256(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { ($imm8:expr) => { vpsrldq(a, $imm8) }; } transmute(constify_imm8!(imm8 * 8, call)) } /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bsrli_epi128) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrldq, imm8 = 3))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_bsrli_epi128(a: __m256i, imm8: i32) -> __m256i { let a = a.as_i64x4(); macro_rules! call { ($imm8:expr) => { vpsrldq(a, $imm8) }; } transmute(constify_imm8!(imm8 * 8, call)) } /// Shifts packed 16-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srl_epi16(a: __m256i, count: __m128i) -> __m256i { transmute(psrlw(a.as_i16x16(), count.as_i16x8())) } /// Shifts packed 32-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrld))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srl_epi32(a: __m256i, count: __m128i) -> __m256i { transmute(psrld(a.as_i32x8(), count.as_i32x4())) } /// Shifts packed 64-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srl_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i { transmute(psrlq(a.as_i64x4(), count.as_i64x2())) } /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in /// zeros /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srli_epi16(a: __m256i, imm8: i32) -> __m256i { transmute(psrliw(a.as_i16x16(), imm8)) } /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in /// zeros /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrld))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srli_epi32(a: __m256i, imm8: i32) -> __m256i { transmute(psrlid(a.as_i32x8(), imm8)) } /// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in /// zeros /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srli_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srli_epi64(a: __m256i, imm8: i32) -> __m256i { transmute(psrliq(a.as_i64x4(), imm8)) } /// Shifts packed 32-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) } /// Shifts packed 32-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) } /// Shifts packed 64-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srlv_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) } /// Shifts packed 64-bit integers in `a` right by the amount specified by /// the corresponding element in `count` while shifting in zeros, /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_srlv_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsrlvq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) } // TODO _mm256_stream_load_si256 (__m256i const* mem_addr) /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) } /// Subtract packed 32-bit integers in `b` from packed 16-bit integers in `a` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) } /// Subtract packed 64-bit integers in `b` from packed 16-bit integers in `a` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) } /// Subtract packed 8-bit integers in `b` from packed 16-bit integers in `a` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_sub_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in /// `a` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i { transmute(psubsw(a.as_i16x16(), b.as_i16x16())) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in /// `a` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i { transmute(psubsb(a.as_i8x32(), b.as_i8x32())) } /// Subtract packed unsigned 16-bit integers in `b` from packed 16-bit /// integers in `a` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubusw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i { transmute(psubusw(a.as_u16x16(), b.as_u16x16())) } /// Subtract packed unsigned 8-bit integers in `b` from packed 8-bit /// integers in `a` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_subs_epu8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpsubusb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i { transmute(psubusb(a.as_u8x32(), b.as_u8x32())) } /// Unpacks and interleave 8-bit integers from the high half of each /// 128-bit lane in `a` and `b`. /// /// ```rust /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("avx2") { /// # #[target_feature(enable = "avx2")] /// # unsafe fn worker() { /// let a = _mm256_setr_epi8( /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /// ); /// let b = _mm256_setr_epi8( /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, /// -30, -31, /// ); /// /// let c = _mm256_unpackhi_epi8(a, b); /// /// let expected = _mm256_setr_epi8( /// 8, -8, 9, -9, 10, -10, 11, -11, 12, -12, 13, -13, 14, -14, 15, -15, /// 24, -24, 25, -25, 26, -26, 27, -27, 28, -28, 29, -29, 30, -30, 31, /// -31, /// ); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpckhbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i { #[rustfmt::skip] let r: i8x32 = simd_shuffle32(a.as_i8x32(), b.as_i8x32(), [ 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63, ]); transmute(r) } /// Unpacks and interleave 8-bit integers from the low half of each /// 128-bit lane of `a` and `b`. /// /// ```rust /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("avx2") { /// # #[target_feature(enable = "avx2")] /// # unsafe fn worker() { /// let a = _mm256_setr_epi8( /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, /// 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /// ); /// let b = _mm256_setr_epi8( /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, /// -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, /// -30, -31, /// ); /// /// let c = _mm256_unpacklo_epi8(a, b); /// /// let expected = _mm256_setr_epi8( /// 0, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 16, -16, 17, /// -17, 18, -18, 19, -19, 20, -20, 21, -21, 22, -22, 23, -23, /// ); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi8) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpcklbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i { #[rustfmt::skip] let r: i8x32 = simd_shuffle32(a.as_i8x32(), b.as_i8x32(), [ 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55, ]); transmute(r) } /// Unpacks and interleave 16-bit integers from the high half of each /// 128-bit lane of `a` and `b`. /// /// ```rust /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("avx2") { /// # #[target_feature(enable = "avx2")] /// # unsafe fn worker() { /// let a = _mm256_setr_epi16( /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /// ); /// let b = _mm256_setr_epi16( /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, /// ); /// /// let c = _mm256_unpackhi_epi16(a, b); /// /// let expected = _mm256_setr_epi16( /// 4, -4, 5, -5, 6, -6, 7, -7, 12, -12, 13, -13, 14, -14, 15, -15, /// ); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpckhwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i { let r: i16x16 = simd_shuffle16( a.as_i16x16(), b.as_i16x16(), [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31], ); transmute(r) } /// Unpacks and interleave 16-bit integers from the low half of each /// 128-bit lane of `a` and `b`. /// /// ```rust /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("avx2") { /// # #[target_feature(enable = "avx2")] /// # unsafe fn worker() { /// /// let a = _mm256_setr_epi16( /// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /// ); /// let b = _mm256_setr_epi16( /// 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, /// ); /// /// let c = _mm256_unpacklo_epi16(a, b); /// /// let expected = _mm256_setr_epi16( /// 0, 0, 1, -1, 2, -2, 3, -3, 8, -8, 9, -9, 10, -10, 11, -11, /// ); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi16) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpunpcklwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i { let r: i16x16 = simd_shuffle16( a.as_i16x16(), b.as_i16x16(), [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27], ); transmute(r) } /// Unpacks and interleave 32-bit integers from the high half of each /// 128-bit lane of `a` and `b`. /// /// ```rust /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("avx2") { /// # #[target_feature(enable = "avx2")] /// # unsafe fn worker() { /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7); /// /// let c = _mm256_unpackhi_epi32(a, b); /// /// let expected = _mm256_setr_epi32(2, -2, 3, -3, 6, -6, 7, -7); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i { let r: i32x8 = simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]); transmute(r) } /// Unpacks and interleave 32-bit integers from the low half of each /// 128-bit lane of `a` and `b`. /// /// ```rust /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("avx2") { /// # #[target_feature(enable = "avx2")] /// # unsafe fn worker() { /// let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); /// let b = _mm256_setr_epi32(0, -1, -2, -3, -4, -5, -6, -7); /// /// let c = _mm256_unpacklo_epi32(a, b); /// /// let expected = _mm256_setr_epi32(0, 0, 1, -1, 4, -4, 5, -5); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi32) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i { let r: i32x8 = simd_shuffle8(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]); transmute(r) } /// Unpacks and interleave 64-bit integers from the high half of each /// 128-bit lane of `a` and `b`. /// /// ```rust /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("avx2") { /// # #[target_feature(enable = "avx2")] /// # unsafe fn worker() { /// let a = _mm256_setr_epi64x(0, 1, 2, 3); /// let b = _mm256_setr_epi64x(0, -1, -2, -3); /// /// let c = _mm256_unpackhi_epi64(a, b); /// /// let expected = _mm256_setr_epi64x(1, -1, 3, -3); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpackhi_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i { let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]); transmute(r) } /// Unpacks and interleave 64-bit integers from the low half of each /// 128-bit lane of `a` and `b`. /// /// ```rust /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("avx2") { /// # #[target_feature(enable = "avx2")] /// # unsafe fn worker() { /// let a = _mm256_setr_epi64x(0, 1, 2, 3); /// let b = _mm256_setr_epi64x(0, -1, -2, -3); /// /// let c = _mm256_unpacklo_epi64(a, b); /// /// let expected = _mm256_setr_epi64x(0, 0, 2, -2); /// assert_eq!(_mm256_movemask_epi8(_mm256_cmpeq_epi8(c, expected)), !0); /// /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_unpacklo_epi64) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vunpcklpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i { let r: i64x4 = simd_shuffle4(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]); transmute(r) } /// Computes the bitwise XOR of 256 bits (representing integer data) /// in `a` and `b` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_xor_si256) #[inline] #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vxorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) } /// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468][https://reviews.llvm.org/D20468]. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi8) #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi8(a: __m256i, imm8: i32) -> i8 { let imm8 = (imm8 & 31) as u32; simd_extract(a.as_i8x32(), imm8) } /// Extracts a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468][https://reviews.llvm.org/D20468]. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi16) #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi16(a: __m256i, imm8: i32) -> i16 { let imm8 = (imm8 & 15) as u32; simd_extract(a.as_i16x16(), imm8) } /// Extracts a 32-bit integer from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi32) #[inline] #[target_feature(enable = "avx2")] // This intrinsic has no corresponding instruction. #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi32(a: __m256i, imm8: i32) -> i32 { let imm8 = (imm8 & 7) as u32; simd_extract(a.as_i32x8(), imm8) } /// Returns the first element of the input vector of `[4 x double]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsd_f64) #[inline] #[target_feature(enable = "avx2")] //#[cfg_attr(test, assert_instr(movsd))] FIXME #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtsd_f64(a: __m256d) -> f64 { simd_extract(a, 0) } /// Returns the first element of the input vector of `[8 x i32]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsi256_si32) #[inline] #[target_feature(enable = "avx2")] //#[cfg_attr(test, assert_instr(movd))] FIXME #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { simd_extract(a.as_i32x8(), 0) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.avx2.pabs.b"] fn pabsb(a: i8x32) -> u8x32; #[link_name = "llvm.x86.avx2.pabs.w"] fn pabsw(a: i16x16) -> u16x16; #[link_name = "llvm.x86.avx2.pabs.d"] fn pabsd(a: i32x8) -> u32x8; #[link_name = "llvm.x86.avx2.padds.b"] fn paddsb(a: i8x32, b: i8x32) -> i8x32; #[link_name = "llvm.x86.avx2.padds.w"] fn paddsw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.paddus.b"] fn paddusb(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.paddus.w"] fn paddusw(a: u16x16, b: u16x16) -> u16x16; #[link_name = "llvm.x86.avx2.pavg.b"] fn pavgb(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.pavg.w"] fn pavgw(a: u16x16, b: u16x16) -> u16x16; #[link_name = "llvm.x86.avx2.pblendvb"] fn pblendvb(a: i8x32, b: i8x32, mask: i8x32) -> i8x32; #[link_name = "llvm.x86.avx2.phadd.w"] fn phaddw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.phadd.d"] fn phaddd(a: i32x8, b: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.phadd.sw"] fn phaddsw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.phsub.w"] fn phsubw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.phsub.d"] fn phsubd(a: i32x8, b: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.phsub.sw"] fn phsubsw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.pmadd.wd"] fn pmaddwd(a: i16x16, b: i16x16) -> i32x8; #[link_name = "llvm.x86.avx2.pmadd.ub.sw"] fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16; #[link_name = "llvm.x86.avx2.maskload.d"] fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4; #[link_name = "llvm.x86.avx2.maskload.d.256"] fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.maskload.q"] fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2; #[link_name = "llvm.x86.avx2.maskload.q.256"] fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4; #[link_name = "llvm.x86.avx2.maskstore.d"] fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4); #[link_name = "llvm.x86.avx2.maskstore.d.256"] fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8); #[link_name = "llvm.x86.avx2.maskstore.q"] fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2); #[link_name = "llvm.x86.avx2.maskstore.q.256"] fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4); #[link_name = "llvm.x86.avx2.pmaxs.w"] fn pmaxsw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.pmaxs.d"] fn pmaxsd(a: i32x8, b: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.pmaxs.b"] fn pmaxsb(a: i8x32, b: i8x32) -> i8x32; #[link_name = "llvm.x86.avx2.pmaxu.w"] fn pmaxuw(a: u16x16, b: u16x16) -> u16x16; #[link_name = "llvm.x86.avx2.pmaxu.d"] fn pmaxud(a: u32x8, b: u32x8) -> u32x8; #[link_name = "llvm.x86.avx2.pmaxu.b"] fn pmaxub(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.pmins.w"] fn pminsw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.pmins.d"] fn pminsd(a: i32x8, b: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.pmins.b"] fn pminsb(a: i8x32, b: i8x32) -> i8x32; #[link_name = "llvm.x86.avx2.pminu.w"] fn pminuw(a: u16x16, b: u16x16) -> u16x16; #[link_name = "llvm.x86.avx2.pminu.d"] fn pminud(a: u32x8, b: u32x8) -> u32x8; #[link_name = "llvm.x86.avx2.pminu.b"] fn pminub(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.pmovmskb"] fn pmovmskb(a: i8x32) -> i32; #[link_name = "llvm.x86.avx2.mpsadbw"] fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16; #[link_name = "llvm.x86.avx2.pmulhu.w"] fn pmulhuw(a: u16x16, b: u16x16) -> u16x16; #[link_name = "llvm.x86.avx2.pmulh.w"] fn pmulhw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.pmul.dq"] fn pmuldq(a: i32x8, b: i32x8) -> i64x4; #[link_name = "llvm.x86.avx2.pmulu.dq"] fn pmuludq(a: u32x8, b: u32x8) -> u64x4; #[link_name = "llvm.x86.avx2.pmul.hr.sw"] fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.packsswb"] fn packsswb(a: i16x16, b: i16x16) -> i8x32; #[link_name = "llvm.x86.avx2.packssdw"] fn packssdw(a: i32x8, b: i32x8) -> i16x16; #[link_name = "llvm.x86.avx2.packuswb"] fn packuswb(a: i16x16, b: i16x16) -> u8x32; #[link_name = "llvm.x86.avx2.packusdw"] fn packusdw(a: i32x8, b: i32x8) -> u16x16; #[link_name = "llvm.x86.avx2.psad.bw"] fn psadbw(a: u8x32, b: u8x32) -> u64x4; #[link_name = "llvm.x86.avx2.psign.b"] fn psignb(a: i8x32, b: i8x32) -> i8x32; #[link_name = "llvm.x86.avx2.psign.w"] fn psignw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.psign.d"] fn psignd(a: i32x8, b: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.psll.w"] fn psllw(a: i16x16, count: i16x8) -> i16x16; #[link_name = "llvm.x86.avx2.psll.d"] fn pslld(a: i32x8, count: i32x4) -> i32x8; #[link_name = "llvm.x86.avx2.psll.q"] fn psllq(a: i64x4, count: i64x2) -> i64x4; #[link_name = "llvm.x86.avx2.pslli.w"] fn pslliw(a: i16x16, imm8: i32) -> i16x16; #[link_name = "llvm.x86.avx2.pslli.d"] fn psllid(a: i32x8, imm8: i32) -> i32x8; #[link_name = "llvm.x86.avx2.pslli.q"] fn pslliq(a: i64x4, imm8: i32) -> i64x4; #[link_name = "llvm.x86.avx2.psllv.d"] fn psllvd(a: i32x4, count: i32x4) -> i32x4; #[link_name = "llvm.x86.avx2.psllv.d.256"] fn psllvd256(a: i32x8, count: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.psllv.q"] fn psllvq(a: i64x2, count: i64x2) -> i64x2; #[link_name = "llvm.x86.avx2.psllv.q.256"] fn psllvq256(a: i64x4, count: i64x4) -> i64x4; #[link_name = "llvm.x86.avx2.psra.w"] fn psraw(a: i16x16, count: i16x8) -> i16x16; #[link_name = "llvm.x86.avx2.psra.d"] fn psrad(a: i32x8, count: i32x4) -> i32x8; #[link_name = "llvm.x86.avx2.psrai.w"] fn psraiw(a: i16x16, imm8: i32) -> i16x16; #[link_name = "llvm.x86.avx2.psrai.d"] fn psraid(a: i32x8, imm8: i32) -> i32x8; #[link_name = "llvm.x86.avx2.psrav.d"] fn psravd(a: i32x4, count: i32x4) -> i32x4; #[link_name = "llvm.x86.avx2.psrav.d.256"] fn psravd256(a: i32x8, count: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.psrl.w"] fn psrlw(a: i16x16, count: i16x8) -> i16x16; #[link_name = "llvm.x86.avx2.psrl.d"] fn psrld(a: i32x8, count: i32x4) -> i32x8; #[link_name = "llvm.x86.avx2.psrl.q"] fn psrlq(a: i64x4, count: i64x2) -> i64x4; #[link_name = "llvm.x86.avx2.psrli.w"] fn psrliw(a: i16x16, imm8: i32) -> i16x16; #[link_name = "llvm.x86.avx2.psrli.d"] fn psrlid(a: i32x8, imm8: i32) -> i32x8; #[link_name = "llvm.x86.avx2.psrli.q"] fn psrliq(a: i64x4, imm8: i32) -> i64x4; #[link_name = "llvm.x86.avx2.psrlv.d"] fn psrlvd(a: i32x4, count: i32x4) -> i32x4; #[link_name = "llvm.x86.avx2.psrlv.d.256"] fn psrlvd256(a: i32x8, count: i32x8) -> i32x8; #[link_name = "llvm.x86.avx2.psrlv.q"] fn psrlvq(a: i64x2, count: i64x2) -> i64x2; #[link_name = "llvm.x86.avx2.psrlv.q.256"] fn psrlvq256(a: i64x4, count: i64x4) -> i64x4; #[link_name = "llvm.x86.avx2.psubs.b"] fn psubsb(a: i8x32, b: i8x32) -> i8x32; #[link_name = "llvm.x86.avx2.psubs.w"] fn psubsw(a: i16x16, b: i16x16) -> i16x16; #[link_name = "llvm.x86.avx2.psubus.b"] fn psubusb(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.psubus.w"] fn psubusw(a: u16x16, b: u16x16) -> u16x16; #[link_name = "llvm.x86.avx2.pshuf.b"] fn pshufb(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.permd"] fn permd(a: u32x8, b: u32x8) -> u32x8; #[link_name = "llvm.x86.avx2.permps"] fn permps(a: __m256, b: i32x8) -> __m256; #[link_name = "llvm.x86.avx2.vperm2i128"] fn vperm2i128(a: i64x4, b: i64x4, imm8: i8) -> i64x4; #[link_name = "llvm.x86.avx2.gather.d.d"] fn pgatherdd(src: i32x4, slice: *const i8, offsets: i32x4, mask: i32x4, scale: i8) -> i32x4; #[link_name = "llvm.x86.avx2.gather.d.d.256"] fn vpgatherdd(src: i32x8, slice: *const i8, offsets: i32x8, mask: i32x8, scale: i8) -> i32x8; #[link_name = "llvm.x86.avx2.gather.d.q"] fn pgatherdq(src: i64x2, slice: *const i8, offsets: i32x4, mask: i64x2, scale: i8) -> i64x2; #[link_name = "llvm.x86.avx2.gather.d.q.256"] fn vpgatherdq(src: i64x4, slice: *const i8, offsets: i32x4, mask: i64x4, scale: i8) -> i64x4; #[link_name = "llvm.x86.avx2.gather.q.d"] fn pgatherqd(src: i32x4, slice: *const i8, offsets: i64x2, mask: i32x4, scale: i8) -> i32x4; #[link_name = "llvm.x86.avx2.gather.q.d.256"] fn vpgatherqd(src: i32x4, slice: *const i8, offsets: i64x4, mask: i32x4, scale: i8) -> i32x4; #[link_name = "llvm.x86.avx2.gather.q.q"] fn pgatherqq(src: i64x2, slice: *const i8, offsets: i64x2, mask: i64x2, scale: i8) -> i64x2; #[link_name = "llvm.x86.avx2.gather.q.q.256"] fn vpgatherqq(src: i64x4, slice: *const i8, offsets: i64x4, mask: i64x4, scale: i8) -> i64x4; #[link_name = "llvm.x86.avx2.gather.d.pd"] fn pgatherdpd( src: __m128d, slice: *const i8, offsets: i32x4, mask: __m128d, scale: i8, ) -> __m128d; #[link_name = "llvm.x86.avx2.gather.d.pd.256"] fn vpgatherdpd( src: __m256d, slice: *const i8, offsets: i32x4, mask: __m256d, scale: i8, ) -> __m256d; #[link_name = "llvm.x86.avx2.gather.q.pd"] fn pgatherqpd( src: __m128d, slice: *const i8, offsets: i64x2, mask: __m128d, scale: i8, ) -> __m128d; #[link_name = "llvm.x86.avx2.gather.q.pd.256"] fn vpgatherqpd( src: __m256d, slice: *const i8, offsets: i64x4, mask: __m256d, scale: i8, ) -> __m256d; #[link_name = "llvm.x86.avx2.gather.d.ps"] fn pgatherdps(src: __m128, slice: *const i8, offsets: i32x4, mask: __m128, scale: i8) -> __m128; #[link_name = "llvm.x86.avx2.gather.d.ps.256"] fn vpgatherdps( src: __m256, slice: *const i8, offsets: i32x8, mask: __m256, scale: i8, ) -> __m256; #[link_name = "llvm.x86.avx2.gather.q.ps"] fn pgatherqps(src: __m128, slice: *const i8, offsets: i64x2, mask: __m128, scale: i8) -> __m128; #[link_name = "llvm.x86.avx2.gather.q.ps.256"] fn vpgatherqps( src: __m128, slice: *const i8, offsets: i64x4, mask: __m128, scale: i8, ) -> __m128; #[link_name = "llvm.x86.avx2.psll.dq"] fn vpslldq(a: i64x4, b: i32) -> i64x4; #[link_name = "llvm.x86.avx2.psrl.dq"] fn vpsrldq(a: i64x4, b: i32) -> i64x4; } #[cfg(test)] mod tests { use std; use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "avx2")] unsafe fn test_mm256_abs_epi32() { #[rustfmt::skip] let a = _mm256_setr_epi32( 0, 1, -1, std::i32::MAX, std::i32::MIN, 100, -100, -32, ); let r = _mm256_abs_epi32(a); #[rustfmt::skip] let e = _mm256_setr_epi32( 0, 1, 1, std::i32::MAX, std::i32::MAX.wrapping_add(1), 100, 100, 32, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_abs_epi16() { #[rustfmt::skip] let a = _mm256_setr_epi16( 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, std::i16::MAX, std::i16::MIN, 100, -100, -32, ); let r = _mm256_abs_epi16(a); #[rustfmt::skip] let e = _mm256_setr_epi16( 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, std::i16::MAX, std::i16::MAX.wrapping_add(1), 100, 100, 32, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_abs_epi8() { #[rustfmt::skip] let a = _mm256_setr_epi8( 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, std::i8::MAX, std::i8::MIN, 100, -100, -32, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, std::i8::MAX, std::i8::MIN, 100, -100, -32, ); let r = _mm256_abs_epi8(a); #[rustfmt::skip] let e = _mm256_setr_epi8( 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, std::i8::MAX, std::i8::MAX.wrapping_add(1), 100, 100, 32, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, std::i8::MAX, std::i8::MAX.wrapping_add(1), 100, 100, 32, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_add_epi64() { let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000); let b = _mm256_setr_epi64x(-1, 0, 1, 2); let r = _mm256_add_epi64(a, b); let e = _mm256_setr_epi64x(-11, 0, 101, 1_000_000_002); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_add_epi32() { let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6); let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); let r = _mm256_add_epi32(a, b); let e = _mm256_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_add_epi16() { #[rustfmt::skip] let a = _mm256_setr_epi16( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let b = _mm256_setr_epi16( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let r = _mm256_add_epi16(a, b); #[rustfmt::skip] let e = _mm256_setr_epi16( 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_add_epi8() { #[rustfmt::skip] let a = _mm256_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); #[rustfmt::skip] let b = _mm256_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); let r = _mm256_add_epi8(a, b); #[rustfmt::skip] let e = _mm256_setr_epi8( 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epi8() { #[rustfmt::skip] let a = _mm256_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); #[rustfmt::skip] let b = _mm256_setr_epi8( 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, ); let r = _mm256_adds_epi8(a, b); #[rustfmt::skip] let e = _mm256_setr_epi8( 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epi8_saturate_positive() { let a = _mm256_set1_epi8(0x7F); let b = _mm256_set1_epi8(1); let r = _mm256_adds_epi8(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epi8_saturate_negative() { let a = _mm256_set1_epi8(-0x80); let b = _mm256_set1_epi8(-1); let r = _mm256_adds_epi8(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epi16() { #[rustfmt::skip] let a = _mm256_setr_epi16( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let b = _mm256_setr_epi16( 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, ); let r = _mm256_adds_epi16(a, b); #[rustfmt::skip] let e = _mm256_setr_epi16( 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epi16_saturate_positive() { let a = _mm256_set1_epi16(0x7FFF); let b = _mm256_set1_epi16(1); let r = _mm256_adds_epi16(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epi16_saturate_negative() { let a = _mm256_set1_epi16(-0x8000); let b = _mm256_set1_epi16(-1); let r = _mm256_adds_epi16(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epu8() { #[rustfmt::skip] let a = _mm256_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); #[rustfmt::skip] let b = _mm256_setr_epi8( 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, ); let r = _mm256_adds_epu8(a, b); #[rustfmt::skip] let e = _mm256_setr_epi8( 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epu8_saturate() { let a = _mm256_set1_epi8(!0); let b = _mm256_set1_epi8(1); let r = _mm256_adds_epu8(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epu16() { #[rustfmt::skip] let a = _mm256_setr_epi16( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let b = _mm256_setr_epi16( 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, ); let r = _mm256_adds_epu16(a, b); #[rustfmt::skip] let e = _mm256_setr_epi16( 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_adds_epu16_saturate() { let a = _mm256_set1_epi16(!0); let b = _mm256_set1_epi16(1); let r = _mm256_adds_epu16(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_and_si256() { let a = _mm256_set1_epi8(5); let b = _mm256_set1_epi8(3); let got = _mm256_and_si256(a, b); assert_eq_m256i(got, _mm256_set1_epi8(1)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_andnot_si256() { let a = _mm256_set1_epi8(5); let b = _mm256_set1_epi8(3); let got = _mm256_andnot_si256(a, b); assert_eq_m256i(got, _mm256_set1_epi8(2)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_avg_epu8() { let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9)); let r = _mm256_avg_epu8(a, b); assert_eq_m256i(r, _mm256_set1_epi8(6)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_avg_epu16() { let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9)); let r = _mm256_avg_epu16(a, b); assert_eq_m256i(r, _mm256_set1_epi16(6)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_blend_epi32() { let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9)); let e = _mm_setr_epi32(9, 3, 3, 3); let r = _mm_blend_epi32(a, b, 0x01 as i32); assert_eq_m128i(r, e); let r = _mm_blend_epi32(b, a, 0x0E as i32); assert_eq_m128i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_blend_epi32() { let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9)); let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3); let r = _mm256_blend_epi32(a, b, 0x01 as i32); assert_eq_m256i(r, e); let e = _mm256_setr_epi32(3, 9, 3, 3, 3, 3, 3, 9); let r = _mm256_blend_epi32(a, b, 0x82 as i32); assert_eq_m256i(r, e); let e = _mm256_setr_epi32(3, 3, 9, 9, 9, 9, 9, 3); let r = _mm256_blend_epi32(a, b, 0x7C as i32); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_blend_epi16() { let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9)); let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3); let r = _mm256_blend_epi16(a, b, 0x01 as i32); assert_eq_m256i(r, e); let r = _mm256_blend_epi16(b, a, 0xFE as i32); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_blendv_epi8() { let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2)); let mask = _mm256_insert_epi8(_mm256_set1_epi8(0), -1, 2); let e = _mm256_insert_epi8(_mm256_set1_epi8(4), 2, 2); let r = _mm256_blendv_epi8(a, b, mask); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm_broadcastb_epi8() { let a = _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0); let res = _mm_broadcastb_epi8(a); assert_eq_m128i(res, _mm_set1_epi8(0x2a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_broadcastb_epi8() { let a = _mm_insert_epi8(_mm_set1_epi8(0x00), 0x2a, 0); let res = _mm256_broadcastb_epi8(a); assert_eq_m256i(res, _mm256_set1_epi8(0x2a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_broadcastd_epi32() { let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0); let res = _mm_broadcastd_epi32(a); assert_eq_m128i(res, _mm_set1_epi32(0x2a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_broadcastd_epi32() { let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0); let res = _mm256_broadcastd_epi32(a); assert_eq_m256i(res, _mm256_set1_epi32(0x2a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_broadcastq_epi64() { let a = _mm_setr_epi64x(0x1ffffffff, 0); let res = _mm_broadcastq_epi64(a); assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_broadcastq_epi64() { let a = _mm_setr_epi64x(0x1ffffffff, 0); let res = _mm256_broadcastq_epi64(a); assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_broadcastsd_pd() { let a = _mm_setr_pd(6.28, 3.14); let res = _mm_broadcastsd_pd(a); assert_eq_m128d(res, _mm_set1_pd(6.28f64)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_broadcastsd_pd() { let a = _mm_setr_pd(6.28, 3.14); let res = _mm256_broadcastsd_pd(a); assert_eq_m256d(res, _mm256_set1_pd(6.28f64)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_broadcastsi128_si256() { let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210); let res = _mm256_broadcastsi128_si256(a); let retval = _mm256_setr_epi64x( 0x0987654321012334, 0x5678909876543210, 0x0987654321012334, 0x5678909876543210, ); assert_eq_m256i(res, retval); } #[simd_test(enable = "avx2")] unsafe fn test_mm_broadcastss_ps() { let a = _mm_setr_ps(6.28, 3.14, 0.0, 0.0); let res = _mm_broadcastss_ps(a); assert_eq_m128(res, _mm_set1_ps(6.28f32)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_broadcastss_ps() { let a = _mm_setr_ps(6.28, 3.14, 0.0, 0.0); let res = _mm256_broadcastss_ps(a); assert_eq_m256(res, _mm256_set1_ps(6.28f32)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_broadcastw_epi16() { let a = _mm_insert_epi16(_mm_set1_epi16(0x2a), 0x22b, 0); let res = _mm_broadcastw_epi16(a); assert_eq_m128i(res, _mm_set1_epi16(0x22b)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_broadcastw_epi16() { let a = _mm_insert_epi16(_mm_set1_epi16(0x2a), 0x22b, 0); let res = _mm256_broadcastw_epi16(a); assert_eq_m256i(res, _mm256_set1_epi16(0x22b)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpeq_epi8() { #[rustfmt::skip] let a = _mm256_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); #[rustfmt::skip] let b = _mm256_setr_epi8( 31, 30, 2, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, ); let r = _mm256_cmpeq_epi8(a, b); assert_eq_m256i(r, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 2)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpeq_epi16() { #[rustfmt::skip] let a = _mm256_setr_epi16( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let b = _mm256_setr_epi16( 15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, ); let r = _mm256_cmpeq_epi16(a, b); assert_eq_m256i(r, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 2)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpeq_epi32() { let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0); let r = _mm256_cmpeq_epi32(a, b); let e = _mm256_set1_epi32(0); let e = _mm256_insert_epi32(e, !0, 2); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpeq_epi64() { let a = _mm256_setr_epi64x(0, 1, 2, 3); let b = _mm256_setr_epi64x(3, 2, 2, 0); let r = _mm256_cmpeq_epi64(a, b); assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 2)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpgt_epi8() { let a = _mm256_insert_epi8(_mm256_set1_epi8(0), 5, 0); let b = _mm256_set1_epi8(0); let r = _mm256_cmpgt_epi8(a, b); assert_eq_m256i(r, _mm256_insert_epi8(_mm256_set1_epi8(0), !0, 0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpgt_epi16() { let a = _mm256_insert_epi16(_mm256_set1_epi16(0), 5, 0); let b = _mm256_set1_epi16(0); let r = _mm256_cmpgt_epi16(a, b); assert_eq_m256i(r, _mm256_insert_epi16(_mm256_set1_epi16(0), !0, 0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpgt_epi32() { let a = _mm256_insert_epi32(_mm256_set1_epi32(0), 5, 0); let b = _mm256_set1_epi32(0); let r = _mm256_cmpgt_epi32(a, b); assert_eq_m256i(r, _mm256_insert_epi32(_mm256_set1_epi32(0), !0, 0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cmpgt_epi64() { let a = _mm256_insert_epi64(_mm256_set1_epi64x(0), 5, 0); let b = _mm256_set1_epi64x(0); let r = _mm256_cmpgt_epi64(a, b); assert_eq_m256i(r, _mm256_insert_epi64(_mm256_set1_epi64x(0), !0, 0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepi8_epi16() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, ); #[rustfmt::skip] let r = _mm256_setr_epi16( 0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, ); assert_eq_m256i(r, _mm256_cvtepi8_epi16(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepi8_epi32() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, ); let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3); assert_eq_m256i(r, _mm256_cvtepi8_epi32(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepi8_epi64() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, ); let r = _mm256_setr_epi64x(0, 0, -1, 1); assert_eq_m256i(r, _mm256_cvtepi8_epi64(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepi16_epi32() { let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3); let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3); assert_eq_m256i(r, _mm256_cvtepi16_epi32(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepi16_epi64() { let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3); let r = _mm256_setr_epi64x(0, 0, -1, 1); assert_eq_m256i(r, _mm256_cvtepi16_epi64(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepi32_epi64() { let a = _mm_setr_epi32(0, 0, -1, 1); let r = _mm256_setr_epi64x(0, 0, -1, 1); assert_eq_m256i(r, _mm256_cvtepi32_epi64(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepu16_epi32() { let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); assert_eq_m256i(r, _mm256_cvtepu16_epi32(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepu16_epi64() { let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm256_setr_epi64x(0, 1, 2, 3); assert_eq_m256i(r, _mm256_cvtepu16_epi64(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepu32_epi64() { let a = _mm_setr_epi32(0, 1, 2, 3); let r = _mm256_setr_epi64x(0, 1, 2, 3); assert_eq_m256i(r, _mm256_cvtepu32_epi64(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepu8_epi16() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let r = _mm256_setr_epi16( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); assert_eq_m256i(r, _mm256_cvtepu8_epi16(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepu8_epi32() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); assert_eq_m256i(r, _mm256_cvtepu8_epi32(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtepu8_epi64() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let r = _mm256_setr_epi64x(0, 1, 2, 3); assert_eq_m256i(r, _mm256_cvtepu8_epi64(a)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_extracti128_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let r = _mm256_extracti128_si256(a, 0b01); let e = _mm_setr_epi64x(3, 4); assert_eq_m128i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_hadd_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_hadd_epi16(a, b); let e = _mm256_setr_epi16(4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_hadd_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_hadd_epi32(a, b); let e = _mm256_setr_epi32(4, 4, 8, 8, 4, 4, 8, 8); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_hadds_epi16() { let a = _mm256_set1_epi16(2); let a = _mm256_insert_epi16(a, 0x7fff, 0); let a = _mm256_insert_epi16(a, 1, 1); let b = _mm256_set1_epi16(4); let r = _mm256_hadds_epi16(a, b); #[rustfmt::skip] let e = _mm256_setr_epi16( 0x7FFF, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_hsub_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_hsub_epi16(a, b); let e = _mm256_set1_epi16(0); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_hsub_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_hsub_epi32(a, b); let e = _mm256_set1_epi32(0); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_hsubs_epi16() { let a = _mm256_set1_epi16(2); let a = _mm256_insert_epi16(a, 0x7fff, 0); let a = _mm256_insert_epi16(a, -1, 1); let b = _mm256_set1_epi16(4); let r = _mm256_hsubs_epi16(a, b); let e = _mm256_insert_epi16(_mm256_set1_epi16(0), 0x7FFF, 0); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_madd_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_madd_epi16(a, b); let e = _mm256_set1_epi32(16); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_inserti128_si256() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let b = _mm_setr_epi64x(7, 8); let r = _mm256_inserti128_si256(a, b, 0b01); let e = _mm256_setr_epi64x(1, 2, 7, 8); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_maddubs_epi16() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); let r = _mm256_maddubs_epi16(a, b); let e = _mm256_set1_epi16(16); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm_maskload_epi32() { let nums = [1, 2, 3, 4]; let a = &nums as *const i32; let mask = _mm_setr_epi32(-1, 0, 0, -1); let r = _mm_maskload_epi32(a, mask); let e = _mm_setr_epi32(1, 0, 0, 4); assert_eq_m128i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_maskload_epi32() { let nums = [1, 2, 3, 4, 5, 6, 7, 8]; let a = &nums as *const i32; let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0); let r = _mm256_maskload_epi32(a, mask); let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm_maskload_epi64() { let nums = [1_i64, 2_i64]; let a = &nums as *const i64; let mask = _mm_setr_epi64x(0, -1); let r = _mm_maskload_epi64(a, mask); let e = _mm_setr_epi64x(0, 2); assert_eq_m128i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_maskload_epi64() { let nums = [1_i64, 2_i64, 3_i64, 4_i64]; let a = &nums as *const i64; let mask = _mm256_setr_epi64x(0, -1, -1, 0); let r = _mm256_maskload_epi64(a, mask); let e = _mm256_setr_epi64x(0, 2, 3, 0); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm_maskstore_epi32() { let a = _mm_setr_epi32(1, 2, 3, 4); let mut arr = [-1, -1, -1, -1]; let mask = _mm_setr_epi32(-1, 0, 0, -1); _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a); let e = [1, -1, -1, 4]; assert_eq!(arr, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_maskstore_epi32() { let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8); let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1]; let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0); _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a); let e = [1, -1, -1, 42, -1, 6, 7, -1]; assert_eq!(arr, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm_maskstore_epi64() { let a = _mm_setr_epi64x(1_i64, 2_i64); let mut arr = [-1_i64, -1_i64]; let mask = _mm_setr_epi64x(0, -1); _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a); let e = [-1, 2]; assert_eq!(arr, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_maskstore_epi64() { let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64); let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64]; let mask = _mm256_setr_epi64x(0, -1, -1, 0); _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a); let e = [-1, 2, 3, -1]; assert_eq!(arr, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_max_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_max_epi16(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_max_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_max_epi32(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_max_epi8() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); let r = _mm256_max_epi8(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_max_epu16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_max_epu16(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_max_epu32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_max_epu32(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_max_epu8() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); let r = _mm256_max_epu8(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_min_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_min_epi16(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_min_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_min_epi32(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_min_epi8() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); let r = _mm256_min_epi8(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_min_epu16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_min_epu16(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_min_epu32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_min_epu32(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_min_epu8() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); let r = _mm256_min_epu8(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_movemask_epi8() { let a = _mm256_set1_epi8(-1); let r = _mm256_movemask_epi8(a); let e = -1; assert_eq!(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mpsadbw_epu8() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); let r = _mm256_mpsadbw_epu8(a, b, 0); let e = _mm256_set1_epi16(8); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mul_epi32() { let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2); let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); let r = _mm256_mul_epi32(a, b); let e = _mm256_setr_epi64x(0, 0, 10, 14); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mul_epu32() { let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2); let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); let r = _mm256_mul_epu32(a, b); let e = _mm256_setr_epi64x(0, 0, 10, 14); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mulhi_epi16() { let a = _mm256_set1_epi16(6535); let b = _mm256_set1_epi16(6535); let r = _mm256_mulhi_epi16(a, b); let e = _mm256_set1_epi16(651); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mulhi_epu16() { let a = _mm256_set1_epi16(6535); let b = _mm256_set1_epi16(6535); let r = _mm256_mulhi_epu16(a, b); let e = _mm256_set1_epi16(651); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mullo_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_mullo_epi16(a, b); let e = _mm256_set1_epi16(8); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mullo_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_mullo_epi32(a, b); let e = _mm256_set1_epi32(8); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mulhrs_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_mullo_epi16(a, b); let e = _mm256_set1_epi16(8); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_or_si256() { let a = _mm256_set1_epi8(-1); let b = _mm256_set1_epi8(0); let r = _mm256_or_si256(a, b); assert_eq_m256i(r, a); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_packs_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_packs_epi16(a, b); #[rustfmt::skip] let e = _mm256_setr_epi8( 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_packs_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_packs_epi32(a, b); let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_packus_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_packus_epi16(a, b); #[rustfmt::skip] let e = _mm256_setr_epi8( 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_packus_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_packus_epi32(a, b); let e = _mm256_setr_epi16(2, 2, 2, 2, 4, 4, 4, 4, 2, 2, 2, 2, 4, 4, 4, 4); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sad_epu8() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(4); let r = _mm256_sad_epu8(a, b); let e = _mm256_set1_epi64x(16); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_shufflehi_epi16() { #[rustfmt::skip] let a = _mm256_setr_epi16( 0, 1, 2, 3, 11, 22, 33, 44, 4, 5, 6, 7, 55, 66, 77, 88, ); #[rustfmt::skip] let e = _mm256_setr_epi16( 0, 1, 2, 3, 44, 22, 22, 11, 4, 5, 6, 7, 88, 66, 66, 55, ); let r = _mm256_shufflehi_epi16(a, 0b00_01_01_11); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_shufflelo_epi16() { #[rustfmt::skip] let a = _mm256_setr_epi16( 11, 22, 33, 44, 0, 1, 2, 3, 55, 66, 77, 88, 4, 5, 6, 7, ); #[rustfmt::skip] let e = _mm256_setr_epi16( 44, 22, 22, 11, 0, 1, 2, 3, 88, 66, 66, 55, 4, 5, 6, 7, ); let r = _mm256_shufflelo_epi16(a, 0b00_01_01_11); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sign_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(-1); let r = _mm256_sign_epi16(a, b); let e = _mm256_set1_epi16(-2); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sign_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(-1); let r = _mm256_sign_epi32(a, b); let e = _mm256_set1_epi32(-2); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sign_epi8() { let a = _mm256_set1_epi8(2); let b = _mm256_set1_epi8(-1); let r = _mm256_sign_epi8(a, b); let e = _mm256_set1_epi8(-2); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sll_epi16() { let a = _mm256_set1_epi16(0xFF); let b = _mm_insert_epi16(_mm_set1_epi16(0), 4, 0); let r = _mm256_sll_epi16(a, b); assert_eq_m256i(r, _mm256_set1_epi16(0xFF0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sll_epi32() { let a = _mm256_set1_epi32(0xFFFF); let b = _mm_insert_epi32(_mm_set1_epi32(0), 4, 0); let r = _mm256_sll_epi32(a, b); assert_eq_m256i(r, _mm256_set1_epi32(0xFFFF0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sll_epi64() { let a = _mm256_set1_epi64x(0xFFFFFFFF); let b = _mm_insert_epi64(_mm_set1_epi64x(0), 4, 0); let r = _mm256_sll_epi64(a, b); assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_slli_epi16() { assert_eq_m256i( _mm256_slli_epi16(_mm256_set1_epi16(0xFF), 4), _mm256_set1_epi16(0xFF0), ); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_slli_epi32() { assert_eq_m256i( _mm256_slli_epi32(_mm256_set1_epi32(0xFFFF), 4), _mm256_set1_epi32(0xFFFF0), ); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_slli_epi64() { assert_eq_m256i( _mm256_slli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4), _mm256_set1_epi64x(0xFFFFFFFF0), ); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_slli_si256() { let a = _mm256_set1_epi64x(0xFFFFFFFF); let r = _mm256_slli_si256(a, 3); assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_sllv_epi32() { let a = _mm_set1_epi32(2); let b = _mm_set1_epi32(1); let r = _mm_sllv_epi32(a, b); let e = _mm_set1_epi32(4); assert_eq_m128i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sllv_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(1); let r = _mm256_sllv_epi32(a, b); let e = _mm256_set1_epi32(4); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm_sllv_epi64() { let a = _mm_set1_epi64x(2); let b = _mm_set1_epi64x(1); let r = _mm_sllv_epi64(a, b); let e = _mm_set1_epi64x(4); assert_eq_m128i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sllv_epi64() { let a = _mm256_set1_epi64x(2); let b = _mm256_set1_epi64x(1); let r = _mm256_sllv_epi64(a, b); let e = _mm256_set1_epi64x(4); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sra_epi16() { let a = _mm256_set1_epi16(-1); let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); let r = _mm256_sra_epi16(a, b); assert_eq_m256i(r, _mm256_set1_epi16(-1)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sra_epi32() { let a = _mm256_set1_epi32(-1); let b = _mm_insert_epi32(_mm_set1_epi32(0), 1, 0); let r = _mm256_sra_epi32(a, b); assert_eq_m256i(r, _mm256_set1_epi32(-1)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srai_epi16() { assert_eq_m256i( _mm256_srai_epi16(_mm256_set1_epi16(-1), 1), _mm256_set1_epi16(-1), ); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srai_epi32() { assert_eq_m256i( _mm256_srai_epi32(_mm256_set1_epi32(-1), 1), _mm256_set1_epi32(-1), ); } #[simd_test(enable = "avx2")] unsafe fn test_mm_srav_epi32() { let a = _mm_set1_epi32(4); let count = _mm_set1_epi32(1); let r = _mm_srav_epi32(a, count); let e = _mm_set1_epi32(2); assert_eq_m128i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srav_epi32() { let a = _mm256_set1_epi32(4); let count = _mm256_set1_epi32(1); let r = _mm256_srav_epi32(a, count); let e = _mm256_set1_epi32(2); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srli_si256() { #[rustfmt::skip] let a = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); let r = _mm256_srli_si256(a, 3); #[rustfmt::skip] let e = _mm256_setr_epi8( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, 0, 0, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srl_epi16() { let a = _mm256_set1_epi16(0xFF); let b = _mm_insert_epi16(_mm_set1_epi16(0), 4, 0); let r = _mm256_srl_epi16(a, b); assert_eq_m256i(r, _mm256_set1_epi16(0xF)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srl_epi32() { let a = _mm256_set1_epi32(0xFFFF); let b = _mm_insert_epi32(_mm_set1_epi32(0), 4, 0); let r = _mm256_srl_epi32(a, b); assert_eq_m256i(r, _mm256_set1_epi32(0xFFF)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srl_epi64() { let a = _mm256_set1_epi64x(0xFFFFFFFF); let b = _mm_setr_epi64x(4, 0); let r = _mm256_srl_epi64(a, b); assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFF)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srli_epi16() { assert_eq_m256i( _mm256_srli_epi16(_mm256_set1_epi16(0xFF), 4), _mm256_set1_epi16(0xF), ); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srli_epi32() { assert_eq_m256i( _mm256_srli_epi32(_mm256_set1_epi32(0xFFFF), 4), _mm256_set1_epi32(0xFFF), ); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srli_epi64() { assert_eq_m256i( _mm256_srli_epi64(_mm256_set1_epi64x(0xFFFFFFFF), 4), _mm256_set1_epi64x(0xFFFFFFF), ); } #[simd_test(enable = "avx2")] unsafe fn test_mm_srlv_epi32() { let a = _mm_set1_epi32(2); let count = _mm_set1_epi32(1); let r = _mm_srlv_epi32(a, count); let e = _mm_set1_epi32(1); assert_eq_m128i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srlv_epi32() { let a = _mm256_set1_epi32(2); let count = _mm256_set1_epi32(1); let r = _mm256_srlv_epi32(a, count); let e = _mm256_set1_epi32(1); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm_srlv_epi64() { let a = _mm_set1_epi64x(2); let count = _mm_set1_epi64x(1); let r = _mm_srlv_epi64(a, count); let e = _mm_set1_epi64x(1); assert_eq_m128i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_srlv_epi64() { let a = _mm256_set1_epi64x(2); let count = _mm256_set1_epi64x(1); let r = _mm256_srlv_epi64(a, count); let e = _mm256_set1_epi64x(1); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sub_epi16() { let a = _mm256_set1_epi16(4); let b = _mm256_set1_epi16(2); let r = _mm256_sub_epi16(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sub_epi32() { let a = _mm256_set1_epi32(4); let b = _mm256_set1_epi32(2); let r = _mm256_sub_epi32(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sub_epi64() { let a = _mm256_set1_epi64x(4); let b = _mm256_set1_epi64x(2); let r = _mm256_sub_epi64(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_sub_epi8() { let a = _mm256_set1_epi8(4); let b = _mm256_set1_epi8(2); let r = _mm256_sub_epi8(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_subs_epi16() { let a = _mm256_set1_epi16(4); let b = _mm256_set1_epi16(2); let r = _mm256_subs_epi16(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_subs_epi8() { let a = _mm256_set1_epi8(4); let b = _mm256_set1_epi8(2); let r = _mm256_subs_epi8(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_subs_epu16() { let a = _mm256_set1_epi16(4); let b = _mm256_set1_epi16(2); let r = _mm256_subs_epu16(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_subs_epu8() { let a = _mm256_set1_epi8(4); let b = _mm256_set1_epi8(2); let r = _mm256_subs_epu8(a, b); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_xor_si256() { let a = _mm256_set1_epi8(5); let b = _mm256_set1_epi8(3); let r = _mm256_xor_si256(a, b); assert_eq_m256i(r, _mm256_set1_epi8(6)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_alignr_epi8() { #[rustfmt::skip] let a = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); #[rustfmt::skip] let b = _mm256_setr_epi8( -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, ); let r = _mm256_alignr_epi8(a, b, 33); assert_eq_m256i(r, _mm256_set1_epi8(0)); let r = _mm256_alignr_epi8(a, b, 17); #[rustfmt::skip] let expected = _mm256_setr_epi8( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 0, ); assert_eq_m256i(r, expected); let r = _mm256_alignr_epi8(a, b, 4); #[rustfmt::skip] let expected = _mm256_setr_epi8( -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, 1, 2, 3, 4, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, 17, 18, 19, 20, ); assert_eq_m256i(r, expected); #[rustfmt::skip] let expected = _mm256_setr_epi8( -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, ); let r = _mm256_alignr_epi8(a, b, 16); assert_eq_m256i(r, expected); let r = _mm256_alignr_epi8(a, b, 15); #[rustfmt::skip] let expected = _mm256_setr_epi8( -16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -32, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); assert_eq_m256i(r, expected); let r = _mm256_alignr_epi8(a, b, 0); assert_eq_m256i(r, b); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_shuffle_epi8() { #[rustfmt::skip] let a = _mm256_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ); #[rustfmt::skip] let b = _mm256_setr_epi8( 4, 128u8 as i8, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0, 4, 128u8 as i8, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0, ); #[rustfmt::skip] let expected = _mm256_setr_epi8( 5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1, 21, 0, 21, 20, 25, 29, 23, 20, 29, 22, 22, 27, 21, 18, 25, 17, ); let r = _mm256_shuffle_epi8(a, b); assert_eq_m256i(r, expected); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_permutevar8x32_epi32() { let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800); let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4); let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500); let r = _mm256_permutevar8x32_epi32(a, b); assert_eq_m256i(r, expected); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_permute4x64_epi64() { let a = _mm256_setr_epi64x(100, 200, 300, 400); let expected = _mm256_setr_epi64x(400, 100, 200, 100); let r = _mm256_permute4x64_epi64(a, 0b00010011); assert_eq_m256i(r, expected); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_permute2x128_si256() { let a = _mm256_setr_epi64x(100, 200, 500, 600); let b = _mm256_setr_epi64x(300, 400, 700, 800); let r = _mm256_permute2x128_si256(a, b, 0b00_01_00_11); let e = _mm256_setr_epi64x(700, 800, 500, 600); assert_eq_m256i(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_permute4x64_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let r = _mm256_permute4x64_pd(a, 0b00_01_00_11); let e = _mm256_setr_pd(4., 1., 2., 1.); assert_eq_m256d(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_permutevar8x32_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4); let r = _mm256_permutevar8x32_ps(a, b); let e = _mm256_setr_ps(6., 1., 6., 2., 8., 7., 4., 5.); assert_eq_m256(r, e); } #[simd_test(enable = "avx2")] unsafe fn test_mm_i32gather_epi32() { let mut arr = [0i32; 128]; for i in 0..128i32 { arr[i as usize] = i; } // A multiplier of 4 is word-addressing let r = _mm_i32gather_epi32(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 4); assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_mask_i32gather_epi32() { let mut arr = [0i32; 128]; for i in 0..128i32 { arr[i as usize] = i; } // A multiplier of 4 is word-addressing let r = _mm_mask_i32gather_epi32( _mm_set1_epi32(256), arr.as_ptr(), _mm_setr_epi32(0, 16, 64, 96), _mm_setr_epi32(-1, -1, -1, 0), 4, ); assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_i32gather_epi32() { let mut arr = [0i32; 128]; for i in 0..128i32 { arr[i as usize] = i; } // A multiplier of 4 is word-addressing let r = _mm256_i32gather_epi32( arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4), 4, ); assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mask_i32gather_epi32() { let mut arr = [0i32; 128]; for i in 0..128i32 { arr[i as usize] = i; } // A multiplier of 4 is word-addressing let r = _mm256_mask_i32gather_epi32( _mm256_set1_epi32(256), arr.as_ptr(), _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0), _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0), 4, ); assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_i32gather_ps() { let mut arr = [0.0f32; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 4 is word-addressing for f32s let r = _mm_i32gather_ps(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 4); assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_mask_i32gather_ps() { let mut arr = [0.0f32; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 4 is word-addressing for f32s let r = _mm_mask_i32gather_ps( _mm_set1_ps(256.0), arr.as_ptr(), _mm_setr_epi32(0, 16, 64, 96), _mm_setr_ps(-1.0, -1.0, -1.0, 0.0), 4, ); assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_i32gather_ps() { let mut arr = [0.0f32; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 4 is word-addressing for f32s let r = _mm256_i32gather_ps( arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4), 4, ); assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mask_i32gather_ps() { let mut arr = [0.0f32; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 4 is word-addressing for f32s let r = _mm256_mask_i32gather_ps( _mm256_set1_ps(256.0), arr.as_ptr(), _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0), _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0), 4, ); assert_eq_m256( r, _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0), ); } #[simd_test(enable = "avx2")] unsafe fn test_mm_i32gather_epi64() { let mut arr = [0i64; 128]; for i in 0..128i64 { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s let r = _mm_i32gather_epi64(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8); assert_eq_m128i(r, _mm_setr_epi64x(0, 16)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_mask_i32gather_epi64() { let mut arr = [0i64; 128]; for i in 0..128i64 { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s let r = _mm_mask_i32gather_epi64( _mm_set1_epi64x(256), arr.as_ptr(), _mm_setr_epi32(16, 16, 16, 16), _mm_setr_epi64x(-1, 0), 8, ); assert_eq_m128i(r, _mm_setr_epi64x(16, 256)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_i32gather_epi64() { let mut arr = [0i64; 128]; for i in 0..128i64 { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s let r = _mm256_i32gather_epi64(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 8); assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mask_i32gather_epi64() { let mut arr = [0i64; 128]; for i in 0..128i64 { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s let r = _mm256_mask_i32gather_epi64( _mm256_set1_epi64x(256), arr.as_ptr(), _mm_setr_epi32(0, 16, 64, 96), _mm256_setr_epi64x(-1, -1, -1, 0), 8, ); assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_i32gather_pd() { let mut arr = [0.0f64; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 8 is word-addressing for f64s let r = _mm_i32gather_pd(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0), 8); assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_mask_i32gather_pd() { let mut arr = [0.0f64; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 8 is word-addressing for f64s let r = _mm_mask_i32gather_pd( _mm_set1_pd(256.0), arr.as_ptr(), _mm_setr_epi32(16, 16, 16, 16), _mm_setr_pd(-1.0, 0.0), 8, ); assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_i32gather_pd() { let mut arr = [0.0f64; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 8 is word-addressing for f64s let r = _mm256_i32gather_pd(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48), 8); assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mask_i32gather_pd() { let mut arr = [0.0f64; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 8 is word-addressing for f64s let r = _mm256_mask_i32gather_pd( _mm256_set1_pd(256.0), arr.as_ptr(), _mm_setr_epi32(0, 16, 64, 96), _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0), 8, ); assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_i64gather_epi32() { let mut arr = [0i32; 128]; for i in 0..128i32 { arr[i as usize] = i; } // A multiplier of 4 is word-addressing let r = _mm_i64gather_epi32(arr.as_ptr(), _mm_setr_epi64x(0, 16), 4); assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_mask_i64gather_epi32() { let mut arr = [0i32; 128]; for i in 0..128i32 { arr[i as usize] = i; } // A multiplier of 4 is word-addressing let r = _mm_mask_i64gather_epi32( _mm_set1_epi32(256), arr.as_ptr(), _mm_setr_epi64x(0, 16), _mm_setr_epi32(-1, 0, -1, 0), 4, ); assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_i64gather_epi32() { let mut arr = [0i32; 128]; for i in 0..128i32 { arr[i as usize] = i; } // A multiplier of 4 is word-addressing let r = _mm256_i64gather_epi32(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 4); assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mask_i64gather_epi32() { let mut arr = [0i32; 128]; for i in 0..128i32 { arr[i as usize] = i; } // A multiplier of 4 is word-addressing let r = _mm256_mask_i64gather_epi32( _mm_set1_epi32(256), arr.as_ptr(), _mm256_setr_epi64x(0, 16, 64, 96), _mm_setr_epi32(-1, -1, -1, 0), 4, ); assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_i64gather_ps() { let mut arr = [0.0f32; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 4 is word-addressing for f32s let r = _mm_i64gather_ps(arr.as_ptr(), _mm_setr_epi64x(0, 16), 4); assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_mask_i64gather_ps() { let mut arr = [0.0f32; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 4 is word-addressing for f32s let r = _mm_mask_i64gather_ps( _mm_set1_ps(256.0), arr.as_ptr(), _mm_setr_epi64x(0, 16), _mm_setr_ps(-1.0, 0.0, -1.0, 0.0), 4, ); assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_i64gather_ps() { let mut arr = [0.0f32; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 4 is word-addressing for f32s let r = _mm256_i64gather_ps(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 4); assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mask_i64gather_ps() { let mut arr = [0.0f32; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 4 is word-addressing for f32s let r = _mm256_mask_i64gather_ps( _mm_set1_ps(256.0), arr.as_ptr(), _mm256_setr_epi64x(0, 16, 64, 96), _mm_setr_ps(-1.0, -1.0, -1.0, 0.0), 4, ); assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_i64gather_epi64() { let mut arr = [0i64; 128]; for i in 0..128i64 { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s let r = _mm_i64gather_epi64(arr.as_ptr(), _mm_setr_epi64x(0, 16), 8); assert_eq_m128i(r, _mm_setr_epi64x(0, 16)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_mask_i64gather_epi64() { let mut arr = [0i64; 128]; for i in 0..128i64 { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s let r = _mm_mask_i64gather_epi64( _mm_set1_epi64x(256), arr.as_ptr(), _mm_setr_epi64x(16, 16), _mm_setr_epi64x(-1, 0), 8, ); assert_eq_m128i(r, _mm_setr_epi64x(16, 256)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_i64gather_epi64() { let mut arr = [0i64; 128]; for i in 0..128i64 { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s let r = _mm256_i64gather_epi64(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 8); assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mask_i64gather_epi64() { let mut arr = [0i64; 128]; for i in 0..128i64 { arr[i as usize] = i; } // A multiplier of 8 is word-addressing for i64s let r = _mm256_mask_i64gather_epi64( _mm256_set1_epi64x(256), arr.as_ptr(), _mm256_setr_epi64x(0, 16, 64, 96), _mm256_setr_epi64x(-1, -1, -1, 0), 8, ); assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_i64gather_pd() { let mut arr = [0.0f64; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 8 is word-addressing for f64s let r = _mm_i64gather_pd(arr.as_ptr(), _mm_setr_epi64x(0, 16), 8); assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm_mask_i64gather_pd() { let mut arr = [0.0f64; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 8 is word-addressing for f64s let r = _mm_mask_i64gather_pd( _mm_set1_pd(256.0), arr.as_ptr(), _mm_setr_epi64x(16, 16), _mm_setr_pd(-1.0, 0.0), 8, ); assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_i64gather_pd() { let mut arr = [0.0f64; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 8 is word-addressing for f64s let r = _mm256_i64gather_pd(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48), 8); assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0)); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_mask_i64gather_pd() { let mut arr = [0.0f64; 128]; let mut j = 0.0; for i in 0..128usize { arr[i] = j; j += 1.0; } // A multiplier of 8 is word-addressing for f64s let r = _mm256_mask_i64gather_pd( _mm256_set1_pd(256.0), arr.as_ptr(), _mm256_setr_epi64x(0, 16, 64, 96), _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0), 8, ); assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0)); } #[simd_test(enable = "avx")] unsafe fn test_mm256_extract_epi8() { #[rustfmt::skip] let a = _mm256_setr_epi8( -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ); let r1 = _mm256_extract_epi8(a, 0); let r2 = _mm256_extract_epi8(a, 35); assert_eq!(r1, -1); assert_eq!(r2, 3); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_extract_epi16() { #[rustfmt::skip] let a = _mm256_setr_epi16( -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let r1 = _mm256_extract_epi16(a, 0); let r2 = _mm256_extract_epi16(a, 19); assert_eq!(r1, -1); assert_eq!(r2, 3); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_extract_epi32() { let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7); let r1 = _mm256_extract_epi32(a, 0); let r2 = _mm256_extract_epi32(a, 11); assert_eq!(r1, -1); assert_eq!(r2, 3); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtsd_f64() { let a = _mm256_setr_pd(1., 2., 3., 4.); let r = _mm256_cvtsd_f64(a); assert_eq!(r, 1.); } #[simd_test(enable = "avx2")] unsafe fn test_mm256_cvtsi256_si32() { let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); let r = _mm256_cvtsi256_si32(a); assert_eq!(r, 1); } } core_arch-0.1.5/src/x86/avx512f.rs010064400007650000024000000132251343447103600146470ustar0000000000000000use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, mem::{self, transmute}, }; #[cfg(test)] use stdsimd_test::assert_instr; /// Computes the absolute values of packed 32-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpabsd))] pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i { let a = a.as_i32x16(); // all-0 is a properly initialized i32x16 let zero: i32x16 = mem::zeroed(); let sub = simd_sub(zero, a); let cmp: i32x16 = simd_gt(a, zero); transmute(simd_select(cmp, a, sub)) } /// Computes the absolute value of packed 32-bit integers in `a`, and store the /// unsigned results in `dst` using writemask `k` (elements are copied from /// `src` when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpabsd))] pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { let abs = _mm512_abs_epi32(a).as_i32x16(); transmute(simd_select_bitmask(k, abs, src.as_i32x16())) } /// Computes the absolute value of packed 32-bit integers in `a`, and store the /// unsigned results in `dst` using zeromask `k` (elements are zeroed out when /// the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vpabsd))] pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { let abs = _mm512_abs_epi32(a).as_i32x16(); let zero = _mm512_setzero_si512().as_i32x16(); transmute(simd_select_bitmask(k, abs, zero)) } /// Returns vector of type `__m512i` with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vxorps))] pub unsafe fn _mm512_setzero_si512() -> __m512i { // All-0 is a properly initialized __m512i mem::zeroed() } /// Sets packed 32-bit integers in `dst` with the supplied values in reverse /// order. #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_setr_epi32( e15: i32, e14: i32, e13: i32, e12: i32, e11: i32, e10: i32, e9: i32, e8: i32, e7: i32, e6: i32, e5: i32, e4: i32, e3: i32, e2: i32, e1: i32, e0: i32, ) -> __m512i { let r = i32x16( e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0, ); transmute(r) } /// Broadcast 64-bit integer `a` to all elements of `dst`. #[inline] #[target_feature(enable = "avx512f")] pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i { transmute(i64x8::splat(a)) } #[cfg(test)] mod tests { use std; use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "avx512f")] unsafe fn test_mm512_abs_epi32() { #[rustfmt::skip] let a = _mm512_setr_epi32( 0, 1, -1, std::i32::MAX, std::i32::MIN, 100, -100, -32, 0, 1, -1, std::i32::MAX, std::i32::MIN, 100, -100, -32, ); let r = _mm512_abs_epi32(a); let e = _mm512_setr_epi32( 0, 1, 1, std::i32::MAX, std::i32::MAX.wrapping_add(1), 100, 100, 32, 0, 1, 1, std::i32::MAX, std::i32::MAX.wrapping_add(1), 100, 100, 32, ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_abs_epi32() { #[rustfmt::skip] let a = _mm512_setr_epi32( 0, 1, -1, std::i32::MAX, std::i32::MIN, 100, -100, -32, 0, 1, -1, std::i32::MAX, std::i32::MIN, 100, -100, -32, ); let r = _mm512_mask_abs_epi32(a, 0, a); assert_eq_m512i(r, a); let r = _mm512_mask_abs_epi32(a, 0b11111111, a); let e = _mm512_setr_epi32( 0, 1, 1, std::i32::MAX, std::i32::MAX.wrapping_add(1), 100, 100, 32, 0, 1, -1, std::i32::MAX, std::i32::MIN, 100, -100, -32, ); assert_eq_m512i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_abs_epi32() { #[rustfmt::skip] let a = _mm512_setr_epi32( 0, 1, -1, std::i32::MAX, std::i32::MIN, 100, -100, -32, 0, 1, -1, std::i32::MAX, std::i32::MIN, 100, -100, -32, ); let r = _mm512_maskz_abs_epi32(0, a); assert_eq_m512i(r, _mm512_setzero_si512()); let r = _mm512_maskz_abs_epi32(0b11111111, a); let e = _mm512_setr_epi32( 0, 1, 1, std::i32::MAX, std::i32::MAX.wrapping_add(1), 100, 100, 32, 0, 0, 0, 0, 0, 0, 0, 0, ); assert_eq_m512i(r, e); } } core_arch-0.1.5/src/x86/avx512ifma.rs010064400007650000024000000170021343447103600153330ustar0000000000000000use crate::core_arch::x86::*; #[cfg(test)] use stdsimd_test::assert_instr; /// Multiply packed unsigned 52-bit integers in each 64-bit element of /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit /// unsigned integer from the intermediate result with the /// corresponding unsigned 64-bit integer in `a`, and store the /// results in `dst`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512IFMA52&expand=3488) #[inline] #[target_feature(enable = "avx512ifma")] #[cfg_attr(test, assert_instr(vpmadd52huq))] pub unsafe fn _mm512_madd52hi_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { vpmadd52huq_512(a, b, c) } /// Multiply packed unsigned 52-bit integers in each 64-bit element of /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit /// unsigned integer from the intermediate result with the /// corresponding unsigned 64-bit integer in `a`, and store the /// results in `dst`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3497&avx512techs=AVX512IFMA52) #[inline] #[target_feature(enable = "avx512ifma")] #[cfg_attr(test, assert_instr(vpmadd52luq))] pub unsafe fn _mm512_madd52lo_epu64(a: __m512i, b: __m512i, c: __m512i) -> __m512i { vpmadd52luq_512(a, b, c) } /// Multiply packed unsigned 52-bit integers in each 64-bit element of /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit /// unsigned integer from the intermediate result with the /// corresponding unsigned 64-bit integer in `a`, and store the /// results in `dst`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3485) #[inline] #[target_feature(enable = "avx512ifma,avx512vl")] #[cfg_attr(test, assert_instr(vpmadd52huq))] pub unsafe fn _mm256_madd52hi_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { vpmadd52huq_256(a, b, c) } /// Multiply packed unsigned 52-bit integers in each 64-bit element of /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit /// unsigned integer from the intermediate result with the /// corresponding unsigned 64-bit integer in `a`, and store the /// results in `dst`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL&expand=3494) #[inline] #[target_feature(enable = "avx512ifma,avx512vl")] #[cfg_attr(test, assert_instr(vpmadd52luq))] pub unsafe fn _mm256_madd52lo_epu64(a: __m256i, b: __m256i, c: __m256i) -> __m256i { vpmadd52luq_256(a, b, c) } /// Multiply packed unsigned 52-bit integers in each 64-bit element of /// `b` and `c` to form a 104-bit intermediate result. Add the high 52-bit /// unsigned integer from the intermediate result with the /// corresponding unsigned 64-bit integer in `a`, and store the /// results in `dst`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3482&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL) #[inline] #[target_feature(enable = "avx512ifma,avx512vl")] #[cfg_attr(test, assert_instr(vpmadd52huq))] pub unsafe fn _mm_madd52hi_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { vpmadd52huq_128(a, b, c) } /// Multiply packed unsigned 52-bit integers in each 64-bit element of /// `b` and `c` to form a 104-bit intermediate result. Add the low 52-bit /// unsigned integer from the intermediate result with the /// corresponding unsigned 64-bit integer in `a`, and store the /// results in `dst`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=3488,3491&text=vpmadd52&avx512techs=AVX512IFMA52,AVX512VL) #[inline] #[target_feature(enable = "avx512ifma,avx512vl")] #[cfg_attr(test, assert_instr(vpmadd52luq))] pub unsafe fn _mm_madd52lo_epu64(a: __m128i, b: __m128i, c: __m128i) -> __m128i { vpmadd52luq_128(a, b, c) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.avx512.vpmadd52l.uq.128"] fn vpmadd52luq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i; #[link_name = "llvm.x86.avx512.vpmadd52h.uq.128"] fn vpmadd52huq_128(z: __m128i, x: __m128i, y: __m128i) -> __m128i; #[link_name = "llvm.x86.avx512.vpmadd52l.uq.256"] fn vpmadd52luq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i; #[link_name = "llvm.x86.avx512.vpmadd52h.uq.256"] fn vpmadd52huq_256(z: __m256i, x: __m256i, y: __m256i) -> __m256i; #[link_name = "llvm.x86.avx512.vpmadd52l.uq.512"] fn vpmadd52luq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i; #[link_name = "llvm.x86.avx512.vpmadd52h.uq.512"] fn vpmadd52huq_512(z: __m512i, x: __m512i, y: __m512i) -> __m512i; } #[cfg(test)] mod tests { use std; use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "avx512ifma")] unsafe fn test_mm512_madd52hi_epu64() { let mut a = _mm512_set1_epi64(10 << 40); let b = _mm512_set1_epi64((11 << 40) + 4); let c = _mm512_set1_epi64((12 << 40) + 3); a = _mm512_madd52hi_epu64(a, b, c); // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) let expected = _mm512_set1_epi64(11030549757952); assert_eq_m512i(a, expected); } #[simd_test(enable = "avx512ifma")] unsafe fn test_mm512_madd52lo_epu64() { let mut a = _mm512_set1_epi64(10 << 40); let b = _mm512_set1_epi64((11 << 40) + 4); let c = _mm512_set1_epi64((12 << 40) + 3); a = _mm512_madd52lo_epu64(a, b, c); // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) let expected = _mm512_set1_epi64(100055558127628); assert_eq_m512i(a, expected); } #[simd_test(enable = "avx512ifma,avx512vl")] unsafe fn test_mm256_madd52hi_epu64() { let mut a = _mm256_set1_epi64x(10 << 40); let b = _mm256_set1_epi64x((11 << 40) + 4); let c = _mm256_set1_epi64x((12 << 40) + 3); a = _mm256_madd52hi_epu64(a, b, c); // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) let expected = _mm256_set1_epi64x(11030549757952); assert_eq_m256i(a, expected); } #[simd_test(enable = "avx512ifma,avx512vl")] unsafe fn test_mm256_madd52lo_epu64() { let mut a = _mm256_set1_epi64x(10 << 40); let b = _mm256_set1_epi64x((11 << 40) + 4); let c = _mm256_set1_epi64x((12 << 40) + 3); a = _mm256_madd52lo_epu64(a, b, c); // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) % (1 << 52)) let expected = _mm256_set1_epi64x(100055558127628); assert_eq_m256i(a, expected); } #[simd_test(enable = "avx512ifma,avx512vl")] unsafe fn test_mm_madd52hi_epu64() { let mut a = _mm_set1_epi64x(10 << 40); let b = _mm_set1_epi64x((11 << 40) + 4); let c = _mm_set1_epi64x((12 << 40) + 3); a = _mm_madd52hi_epu64(a, b, c); // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) let expected = _mm_set1_epi64x(11030549757952); assert_eq_m128i(a, expected); } #[simd_test(enable = "avx512ifma,avx512vl")] unsafe fn test_mm_madd52lo_epu64() { let mut a = _mm_set1_epi64x(10 << 40); let b = _mm_set1_epi64x((11 << 40) + 4); let c = _mm_set1_epi64x((12 << 40) + 3); a = _mm_madd52hi_epu64(a, b, c); // (10 << 40) + ((((11 << 40) + 4) * ((12 << 40) + 3)) >> 52) let expected = _mm_set1_epi64x(11030549757952); assert_eq_m128i(a, expected); } } core_arch-0.1.5/src/x86/bmi1.rs010064400007650000024000000134311343447103600143020ustar0000000000000000//! Bit Manipulation Instruction (BMI) Set 1.0. //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. //! //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions //! available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { _bextr2_u32(a, (start & 0xff_u32) | ((len & 0xff_u32) << 8_u32)) } /// Extracts bits of `a` specified by `control` into /// the least significant bits of the result. /// /// Bits `[7,0]` of `control` specify the index to the first bit in the range /// to be extracted, and bits `[15,8]` specify the length of the range. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 { x86_bmi_bextr_32(a, control) } /// Bitwise logical `AND` of inverted `a` with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(andn))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 { !a & b } /// Extracts lowest set isolated bit. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsi))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsi_u32(x: u32) -> u32 { x & x.wrapping_neg() } /// Gets mask up to lowest set bit. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsmsk_u32(x: u32) -> u32 { x ^ (x.wrapping_sub(1_u32)) } /// Resets the lowest set bit of `x`. /// /// If `x` is sets CF. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsr))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsr_u32(x: u32) -> u32 { x & (x.wrapping_sub(1)) } /// Counts the number of trailing least significant zero bits. /// /// When the source operand is `0`, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _tzcnt_u32(x: u32) -> u32 { x.trailing_zeros() } /// Counts the number of trailing least significant zero bits. /// /// When the source operand is `0`, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_32) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_tzcnt_32(x: u32) -> i32 { x.trailing_zeros() as i32 } extern "C" { #[link_name = "llvm.x86.bmi.bextr.32"] fn x86_bmi_bextr_32(x: u32, y: u32) -> u32; } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "bmi1")] unsafe fn test_bextr_u32() { let r = _bextr_u32(0b0101_0000u32, 4, 4); assert_eq!(r, 0b0000_0101u32); } #[simd_test(enable = "bmi1")] unsafe fn test_andn_u32() { assert_eq!(_andn_u32(0, 0), 0); assert_eq!(_andn_u32(0, 1), 1); assert_eq!(_andn_u32(1, 0), 0); assert_eq!(_andn_u32(1, 1), 0); let r = _andn_u32(0b0000_0000u32, 0b0000_0000u32); assert_eq!(r, 0b0000_0000u32); let r = _andn_u32(0b0000_0000u32, 0b1111_1111u32); assert_eq!(r, 0b1111_1111u32); let r = _andn_u32(0b1111_1111u32, 0b0000_0000u32); assert_eq!(r, 0b0000_0000u32); let r = _andn_u32(0b1111_1111u32, 0b1111_1111u32); assert_eq!(r, 0b0000_0000u32); let r = _andn_u32(0b0100_0000u32, 0b0101_1101u32); assert_eq!(r, 0b0001_1101u32); } #[simd_test(enable = "bmi1")] unsafe fn test_blsi_u32() { assert_eq!(_blsi_u32(0b1101_0000u32), 0b0001_0000u32); } #[simd_test(enable = "bmi1")] unsafe fn test_blsmsk_u32() { let r = _blsmsk_u32(0b0011_0000u32); assert_eq!(r, 0b0001_1111u32); } #[simd_test(enable = "bmi1")] unsafe fn test_blsr_u32() { // TODO: test the behavior when the input is `0`. let r = _blsr_u32(0b0011_0000u32); assert_eq!(r, 0b0010_0000u32); } #[simd_test(enable = "bmi1")] unsafe fn test_tzcnt_u32() { assert_eq!(_tzcnt_u32(0b0000_0001u32), 0u32); assert_eq!(_tzcnt_u32(0b0000_0000u32), 32u32); assert_eq!(_tzcnt_u32(0b1001_0000u32), 4u32); } } core_arch-0.1.5/src/x86/bmi2.rs010064400007650000024000000105271343447103600143060ustar0000000000000000//! Bit Manipulation Instruction (BMI) Set 2.0. //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. //! //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions //! available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [wikipedia_bmi]: //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; /// Unsigned multiply without affecting flags. /// /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with /// the low half and the high half of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u32) #[inline] // LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(imul))] #[cfg_attr(all(test, target_arch = "x86"), assert_instr(mul))] #[target_feature(enable = "bmi2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 { let result: u64 = (a as u64) * (b as u64); *hi = (result >> 32) as u32; result as u32 } /// Zeroes higher bits of `a` >= `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u32) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(bzhi))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 { x86_bmi2_bzhi_32(a, index) } /// Scatter contiguous low order bits of `a` to the result at the positions /// specified by the `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u32) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pdep))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 { x86_bmi2_pdep_32(a, mask) } /// Gathers the bits of `x` specified by the `mask` into the contiguous low /// order bit positions of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u32) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pext))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _pext_u32(a: u32, mask: u32) -> u32 { x86_bmi2_pext_32(a, mask) } extern "C" { #[link_name = "llvm.x86.bmi.bzhi.32"] fn x86_bmi2_bzhi_32(x: u32, y: u32) -> u32; #[link_name = "llvm.x86.bmi.pdep.32"] fn x86_bmi2_pdep_32(x: u32, y: u32) -> u32; #[link_name = "llvm.x86.bmi.pext.32"] fn x86_bmi2_pext_32(x: u32, y: u32) -> u32; } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "bmi2")] unsafe fn test_pext_u32() { let n = 0b1011_1110_1001_0011u32; let m0 = 0b0110_0011_1000_0101u32; let s0 = 0b0000_0000_0011_0101u32; let m1 = 0b1110_1011_1110_1111u32; let s1 = 0b0001_0111_0100_0011u32; assert_eq!(_pext_u32(n, m0), s0); assert_eq!(_pext_u32(n, m1), s1); } #[simd_test(enable = "bmi2")] unsafe fn test_pdep_u32() { let n = 0b1011_1110_1001_0011u32; let m0 = 0b0110_0011_1000_0101u32; let s0 = 0b0000_0010_0000_0101u32; let m1 = 0b1110_1011_1110_1111u32; let s1 = 0b1110_1001_0010_0011u32; assert_eq!(_pdep_u32(n, m0), s0); assert_eq!(_pdep_u32(n, m1), s1); } #[simd_test(enable = "bmi2")] unsafe fn test_bzhi_u32() { let n = 0b1111_0010u32; let s = 0b0001_0010u32; assert_eq!(_bzhi_u32(n, 5), s); } #[simd_test(enable = "bmi2")] unsafe fn test_mulx_u32() { let a: u32 = 4_294_967_200; let b: u32 = 2; let mut hi = 0; let lo = _mulx_u32(a, b, &mut hi); /* result = 8589934400 = 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64 ^~hi ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ assert_eq!(lo, 0b1111_1111_1111_1111_1111_1111_0100_0000u32); assert_eq!(hi, 0b0001u32); } } core_arch-0.1.5/src/x86/bswap.rs010064400007650000024000000014051345561510300145620ustar0000000000000000//! Byte swap intrinsics. #![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; /// Returns an integer with the reversed byte order of x /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap) #[inline] #[cfg_attr(test, assert_instr(bswap))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bswap(x: i32) -> i32 { bswap_i32(x) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.bswap.i32"] fn bswap_i32(x: i32) -> i32; } #[cfg(test)] mod tests { use super::*; #[test] fn test_bswap() { unsafe { assert_eq!(_bswap(0x0EADBE0F), 0x0FBEAD0E); assert_eq!(_bswap(0x00000000), 0x00000000); } } } core_arch-0.1.5/src/x86/bt.rs010064400007650000024000000057321344736322300140660ustar0000000000000000#[cfg(test)] use stdsimd_test::assert_instr; /// Returns the bit in position `b` of the memory addressed by `p`. #[inline] #[cfg_attr(test, assert_instr(bt))] #[unstable(feature = "simd_x86_bittest", issue = "59414")] pub unsafe fn _bittest(p: *const i32, b: i32) -> u8 { let r: u8; asm!("btl $2, $1\n\tsetc ${0:b}" : "=r"(r) : "*m"(p), "r"(b) : "cc", "memory"); r } /// Returns the bit in position `b` of the memory addressed by `p`, then sets the bit to `1`. #[inline] #[cfg_attr(test, assert_instr(bts))] #[unstable(feature = "simd_x86_bittest", issue = "59414")] pub unsafe fn _bittestandset(p: *mut i32, b: i32) -> u8 { let r: u8; asm!("btsl $2, $1\n\tsetc ${0:b}" : "=r"(r), "+*m"(p) : "r"(b) : "cc", "memory"); r } /// Returns the bit in position `b` of the memory addressed by `p`, then resets that bit to `0`. #[inline] #[cfg_attr(test, assert_instr(btr))] #[unstable(feature = "simd_x86_bittest", issue = "59414")] pub unsafe fn _bittestandreset(p: *mut i32, b: i32) -> u8 { let r: u8; asm!("btrl $2, $1\n\tsetc ${0:b}" : "=r"(r), "+*m"(p) : "r"(b) : "cc", "memory"); r } /// Returns the bit in position `b` of the memory addressed by `p`, then inverts that bit. #[inline] #[cfg_attr(test, assert_instr(btc))] #[unstable(feature = "simd_x86_bittest", issue = "59414")] pub unsafe fn _bittestandcomplement(p: *mut i32, b: i32) -> u8 { let r: u8; asm!("btcl $2, $1\n\tsetc ${0:b}" : "=r"(r), "+*m"(p) : "r"(b) : "cc", "memory"); r } #[cfg(test)] mod tests { use crate::core_arch::x86::*; #[test] fn test_bittest() { unsafe { let a = 0b0101_0000i32; assert_eq!(_bittest(&a as _, 4), 1); assert_eq!(_bittest(&a as _, 5), 0); } } #[test] fn test_bittestandset() { unsafe { let mut a = 0b0101_0000i32; assert_eq!(_bittestandset(&mut a as _, 4), 1); assert_eq!(_bittestandset(&mut a as _, 4), 1); assert_eq!(_bittestandset(&mut a as _, 5), 0); assert_eq!(_bittestandset(&mut a as _, 5), 1); } } #[test] fn test_bittestandreset() { unsafe { let mut a = 0b0101_0000i32; assert_eq!(_bittestandreset(&mut a as _, 4), 1); assert_eq!(_bittestandreset(&mut a as _, 4), 0); assert_eq!(_bittestandreset(&mut a as _, 5), 0); assert_eq!(_bittestandreset(&mut a as _, 5), 0); } } #[test] fn test_bittestandcomplement() { unsafe { let mut a = 0b0101_0000i32; assert_eq!(_bittestandcomplement(&mut a as _, 4), 1); assert_eq!(_bittestandcomplement(&mut a as _, 4), 0); assert_eq!(_bittestandcomplement(&mut a as _, 4), 1); assert_eq!(_bittestandcomplement(&mut a as _, 5), 0); assert_eq!(_bittestandcomplement(&mut a as _, 5), 1); } } } core_arch-0.1.5/src/x86/cpuid.rs010064400007650000024000000147131345561510300145600ustar0000000000000000//! `cpuid` intrinsics #![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; /// Result of the `cpuid` instruction. #[allow(clippy::missing_inline_in_public_items)] // ^^ the derived impl of Debug for CpuidResult is not #[inline] and that's OK. #[derive(Copy, Clone, Debug, Eq, Ord, PartialEq, PartialOrd)] #[stable(feature = "simd_x86", since = "1.27.0")] pub struct CpuidResult { /// EAX register. #[stable(feature = "simd_x86", since = "1.27.0")] pub eax: u32, /// EBX register. #[stable(feature = "simd_x86", since = "1.27.0")] pub ebx: u32, /// ECX register. #[stable(feature = "simd_x86", since = "1.27.0")] pub ecx: u32, /// EDX register. #[stable(feature = "simd_x86", since = "1.27.0")] pub edx: u32, } /// Returns the result of the `cpuid` instruction for a given `leaf` (`EAX`) /// and /// `sub_leaf` (`ECX`). /// /// The highest-supported leaf value is returned by the first tuple argument of /// [`__get_cpuid_max(0)`](fn.__get_cpuid_max.html). For leaves containung /// sub-leaves, the second tuple argument returns the highest-supported /// sub-leaf /// value. /// /// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which /// information using the `EAX` and `ECX` registers, and the interpretation of /// the results returned in `EAX`, `EBX`, `ECX`, and `EDX`. /// /// The references are: /// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: /// Instruction Set Reference, A-Z][intel64_ref]. /// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and /// System Instructions][amd64_ref]. /// /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID /// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf /// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf #[inline] #[cfg_attr(test, assert_instr(cpuid))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult { let eax; let ebx; let ecx; let edx; #[cfg(target_arch = "x86")] { asm!("cpuid" : "={eax}"(eax), "={ebx}"(ebx), "={ecx}"(ecx), "={edx}"(edx) : "{eax}"(leaf), "{ecx}"(sub_leaf) : :); } #[cfg(target_arch = "x86_64")] { // x86-64 uses %rbx as the base register, so preserve it. asm!("cpuid\n" : "={eax}"(eax), "={ebx}"(ebx), "={ecx}"(ecx), "={edx}"(edx) : "{eax}"(leaf), "{ecx}"(sub_leaf) : "rbx" :); } CpuidResult { eax, ebx, ecx, edx } } /// See [`__cpuid_count`](fn.__cpuid_count.html). #[inline] #[cfg_attr(test, assert_instr(cpuid))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __cpuid(leaf: u32) -> CpuidResult { __cpuid_count(leaf, 0) } /// Does the host support the `cpuid` instruction? #[inline] pub fn has_cpuid() -> bool { #[cfg(target_env = "sgx")] { false } #[cfg(all(not(target_env = "sgx"), target_arch = "x86_64"))] { true } #[cfg(all(not(target_env = "sgx"), target_arch = "x86"))] { // Optimization for i586 and i686 Rust targets which SSE enabled // and support cpuid: #[cfg(target_feature = "sse")] { true } // If SSE is not enabled, detect whether cpuid is available: #[cfg(not(target_feature = "sse"))] unsafe { // On `x86` the `cpuid` instruction is not always available. // This follows the approach indicated in: // http://wiki.osdev.org/CPUID#Checking_CPUID_availability // https://software.intel.com/en-us/articles/using-cpuid-to-detect-the-presence-of-sse-41-and-sse-42-instruction-sets/ // which detects whether `cpuid` is available by checking whether // the 21st bit of the EFLAGS register is modifiable or not. // If it is, then `cpuid` is available. let result: u32; let _temp: u32; asm!(r#" # Read eflags into $0 and copy it into $1: pushfd pop $0 mov $1, $0 # Flip 21st bit of $0. xor $0, 0x200000 # Set eflags to the value of $0 # # Bit 21st can only be modified if cpuid is available push $0 popfd # A # Read eflags into $0: pushfd # B pop $0 # xor with the original eflags sets the bits that # have been modified: xor $0, $1 "# : "=r"(result), "=r"(_temp) : : "cc", "memory" : "intel"); // There is a race between popfd (A) and pushfd (B) // where other bits beyond 21st may have been modified due to // interrupts, a debugger stepping through the asm, etc. // // Therefore, explicitly check whether the 21st bit // was modified or not. // // If the result is zero, the cpuid bit was not modified. // If the result is `0x200000` (non-zero), then the cpuid // was correctly modified and the CPU supports the cpuid // instruction: (result & 0x200000) != 0 } } } /// Returns the highest-supported `leaf` (`EAX`) and sub-leaf (`ECX`) `cpuid` /// values. /// /// If `cpuid` is supported, and `leaf` is zero, then the first tuple argument /// contains the highest `leaf` value that `cpuid` supports. For `leaf`s /// containing sub-leafs, the second tuple argument contains the /// highest-supported sub-leaf value. /// /// See also [`__cpuid`](fn.__cpuid.html) and /// [`__cpuid_count`](fn.__cpuid_count.html). #[inline] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) { let CpuidResult { eax, ebx, .. } = __cpuid(leaf); (eax, ebx) } #[cfg(test)] mod tests { use crate::core_arch::x86::*; #[test] fn test_always_has_cpuid() { // all currently-tested targets have the instruction // FIXME: add targets without `cpuid` to CI assert!(cpuid::has_cpuid()); } #[test] fn test_has_cpuid_idempotent() { assert_eq!(cpuid::has_cpuid(), cpuid::has_cpuid()); } } core_arch-0.1.5/src/x86/eflags.rs010064400007650000024000000044301343447103600147120ustar0000000000000000//! `i386` intrinsics /// Reads EFLAGS. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags) #[cfg(target_arch = "x86")] #[inline(always)] #[stable(feature = "simd_x86", since = "1.27.0")] #[rustc_deprecated( since = "1.29.0", reason = "See issue #51810 - use inline assembly instead" )] #[doc(hidden)] pub unsafe fn __readeflags() -> u32 { let eflags: u32; asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile"); eflags } /// Reads EFLAGS. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags) #[cfg(target_arch = "x86_64")] #[inline(always)] #[stable(feature = "simd_x86", since = "1.27.0")] #[rustc_deprecated( since = "1.29.0", reason = "See issue #51810 - use inline assembly instead" )] #[doc(hidden)] pub unsafe fn __readeflags() -> u64 { let eflags: u64; asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile"); eflags } /// Write EFLAGS. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags) #[cfg(target_arch = "x86")] #[inline(always)] #[stable(feature = "simd_x86", since = "1.27.0")] #[rustc_deprecated( since = "1.29.0", reason = "See issue #51810 - use inline assembly instead" )] #[doc(hidden)] pub unsafe fn __writeeflags(eflags: u32) { asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile"); } /// Write EFLAGS. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags) #[cfg(target_arch = "x86_64")] #[inline(always)] #[stable(feature = "simd_x86", since = "1.27.0")] #[rustc_deprecated( since = "1.29.0", reason = "See issue #51810 - use inline assembly instead" )] #[doc(hidden)] pub unsafe fn __writeeflags(eflags: u64) { asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile"); } #[cfg(test)] mod tests { use crate::core_arch::x86::*; #[test] #[allow(deprecated)] fn test_eflags() { unsafe { // reads eflags, writes them back, reads them again, // and compare for equality: let v = __readeflags(); __writeeflags(v); let u = __readeflags(); assert_eq!(v, u); } } } core_arch-0.1.5/src/x86/fma.rs010064400007650000024000001000701343447103600142110ustar0000000000000000//! Fused Multiply-Add instruction set (FMA) //! //! The FMA instruction set is an extension to the 128 and 256-bit SSE //! instructions in the x86 microprocessor instruction set to perform fused //! multiply–add (FMA) operations. //! //! The references are: //! //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: //! Instruction Set Reference, A-Z][intel64_ref]. //! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and //! System Instructions][amd64_ref]. //! //! Wikipedia's [FMA][wiki_fma] page provides a quick overview of the //! instructions available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate use crate::core_arch::x86::*; #[cfg(test)] use stdsimd_test::assert_instr; /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and add the intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmaddpd(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and add the intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfmaddpd256(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and add the intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfmaddps(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and add the intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmadd_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfmaddps256(a, b, c) } /// Multiplies the lower double-precision (64-bit) floating-point elements in /// `a` and `b`, and add the intermediate result to the lower element in `c`. /// Stores the result in the lower element of the returned value, and copy the /// upper element from `a` to the upper elements of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_sd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmaddsd(a, b, c) } /// Multiplies the lower single-precision (32-bit) floating-point elements in /// `a` and `b`, and add the intermediate result to the lower element in `c`. /// Stores the result in the lower element of the returned value, and copy the /// 3 upper elements from `a` to the upper elements of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_ss) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { vfmaddss(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and alternatively add and subtract packed elements in `c` to/from /// the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmaddsubpd(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and alternatively add and subtract packed elements in `c` to/from /// the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfmaddsubpd256(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and alternatively add and subtract packed elements in `c` to/from /// the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmaddsub_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfmaddsubps(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and alternatively add and subtract packed elements in `c` to/from /// the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmaddsub_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmaddsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfmaddsubps256(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmsubpd(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfmsubpd256(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub213ps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfmsubps(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsub_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub213ps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfmsubps256(a, b, c) } /// Multiplies the lower double-precision (64-bit) floating-point elements in /// `a` and `b`, and subtract the lower element in `c` from the intermediate /// result. Store the result in the lower element of the returned value, and /// copy the upper element from `a` to the upper elements of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_sd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmsubsd(a, b, c) } /// Multiplies the lower single-precision (32-bit) floating-point elements in /// `a` and `b`, and subtract the lower element in `c` from the intermediate /// result. Store the result in the lower element of the returned value, and /// copy the 3 upper elements from `a` to the upper elements of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsub_ss) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { vfmsubss(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and alternatively subtract and add packed elements in `c` from/to /// the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfmsubaddpd(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and alternatively subtract and add packed elements in `c` from/to /// the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfmsubaddpd256(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and alternatively subtract and add packed elements in `c` from/to /// the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmsubadd_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfmsubaddps(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and alternatively subtract and add packed elements in `c` from/to /// the intermediate result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fmsubadd_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfmsubadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfmsubaddps256(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and add the negated intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfnmaddpd(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and add the negated intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfnmaddpd256(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and add the negated intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfnmaddps(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and add the negated intermediate result to packed elements in `c`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmadd_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfnmaddps256(a, b, c) } /// Multiplies the lower double-precision (64-bit) floating-point elements in /// `a` and `b`, and add the negated intermediate result to the lower element /// in `c`. Store the result in the lower element of the returned value, and /// copy the upper element from `a` to the upper elements of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_sd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfnmaddsd(a, b, c) } /// Multiplies the lower single-precision (32-bit) floating-point elements in /// `a` and `b`, and add the negated intermediate result to the lower element /// in `c`. Store the result in the lower element of the returned value, and /// copy the 3 upper elements from `a` to the upper elements of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmadd_ss) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmadd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 { vfnmaddss(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the negated intermediate /// result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfnmsubpd(a, b, c) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the negated intermediate /// result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_pd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d { vfnmsubpd256(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the negated intermediate /// result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 { vfnmsubps(a, b, c) } /// Multiplies packed single-precision (32-bit) floating-point elements in `a` /// and `b`, and subtract packed elements in `c` from the negated intermediate /// result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_fnmsub_ps) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 { vfnmsubps256(a, b, c) } /// Multiplies the lower double-precision (64-bit) floating-point elements in /// `a` and `b`, and subtract packed elements in `c` from the negated /// intermediate result. Store the result in the lower element of the returned /// value, and copy the upper element from `a` to the upper elements of the /// result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_sd) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d { vfnmsubsd(a, b, c) } /// Multiplies the lower single-precision (32-bit) floating-point elements in /// `a` and `b`, and subtract packed elements in `c` from the negated /// intermediate result. Store the result in the lower element of the /// returned value, and copy the 3 upper elements from `a` to the upper /// elements of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fnmsub_ss) #[inline] #[target_feature(enable = "fma")] #[cfg_attr(test, assert_instr(vfnmsub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 { vfnmsubss(a, b, c) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.fma.vfmadd.pd"] fn vfmaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfmadd.pd.256"] fn vfmaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d; #[link_name = "llvm.x86.fma.vfmadd.ps"] fn vfmaddps(a: __m128, b: __m128, c: __m128) -> __m128; #[link_name = "llvm.x86.fma.vfmadd.ps.256"] fn vfmaddps256(a: __m256, b: __m256, c: __m256) -> __m256; #[link_name = "llvm.x86.fma.vfmadd.sd"] fn vfmaddsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfmadd.ss"] fn vfmaddss(a: __m128, b: __m128, c: __m128) -> __m128; #[link_name = "llvm.x86.fma.vfmaddsub.pd"] fn vfmaddsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfmaddsub.pd.256"] fn vfmaddsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d; #[link_name = "llvm.x86.fma.vfmaddsub.ps"] fn vfmaddsubps(a: __m128, b: __m128, c: __m128) -> __m128; #[link_name = "llvm.x86.fma.vfmaddsub.ps.256"] fn vfmaddsubps256(a: __m256, b: __m256, c: __m256) -> __m256; #[link_name = "llvm.x86.fma.vfmsub.pd"] fn vfmsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfmsub.pd.256"] fn vfmsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d; #[link_name = "llvm.x86.fma.vfmsub.ps"] fn vfmsubps(a: __m128, b: __m128, c: __m128) -> __m128; #[link_name = "llvm.x86.fma.vfmsub.ps.256"] fn vfmsubps256(a: __m256, b: __m256, c: __m256) -> __m256; #[link_name = "llvm.x86.fma.vfmsub.sd"] fn vfmsubsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfmsub.ss"] fn vfmsubss(a: __m128, b: __m128, c: __m128) -> __m128; #[link_name = "llvm.x86.fma.vfmsubadd.pd"] fn vfmsubaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfmsubadd.pd.256"] fn vfmsubaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d; #[link_name = "llvm.x86.fma.vfmsubadd.ps"] fn vfmsubaddps(a: __m128, b: __m128, c: __m128) -> __m128; #[link_name = "llvm.x86.fma.vfmsubadd.ps.256"] fn vfmsubaddps256(a: __m256, b: __m256, c: __m256) -> __m256; #[link_name = "llvm.x86.fma.vfnmadd.pd"] fn vfnmaddpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfnmadd.pd.256"] fn vfnmaddpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d; #[link_name = "llvm.x86.fma.vfnmadd.ps"] fn vfnmaddps(a: __m128, b: __m128, c: __m128) -> __m128; #[link_name = "llvm.x86.fma.vfnmadd.ps.256"] fn vfnmaddps256(a: __m256, b: __m256, c: __m256) -> __m256; #[link_name = "llvm.x86.fma.vfnmadd.sd"] fn vfnmaddsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfnmadd.ss"] fn vfnmaddss(a: __m128, b: __m128, c: __m128) -> __m128; #[link_name = "llvm.x86.fma.vfnmsub.pd"] fn vfnmsubpd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfnmsub.pd.256"] fn vfnmsubpd256(a: __m256d, b: __m256d, c: __m256d) -> __m256d; #[link_name = "llvm.x86.fma.vfnmsub.ps"] fn vfnmsubps(a: __m128, b: __m128, c: __m128) -> __m128; #[link_name = "llvm.x86.fma.vfnmsub.ps.256"] fn vfnmsubps256(a: __m256, b: __m256, c: __m256) -> __m256; #[link_name = "llvm.x86.fma.vfnmsub.sd"] fn vfnmsubsd(a: __m128d, b: __m128d, c: __m128d) -> __m128d; #[link_name = "llvm.x86.fma.vfnmsub.ss"] fn vfnmsubss(a: __m128, b: __m128, c: __m128) -> __m128; } #[cfg(test)] mod tests { use std; use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "fma")] unsafe fn test_mm_fmadd_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(9., 15.); assert_eq_m128d(_mm_fmadd_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fmadd_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 3., 7., 2.); let c = _mm256_setr_pd(4., 9., 1., 7.); let r = _mm256_setr_pd(9., 15., 22., 15.); assert_eq_m256d(_mm256_fmadd_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmadd_ps() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(9., 15., 22., 15.); assert_eq_m128(_mm_fmadd_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fmadd_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); let r = _mm256_setr_ps(9., 15., 22., 15., -5., -49., -2., -31.); assert_eq_m256(_mm256_fmadd_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmadd_sd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(9., 2.); assert_eq_m128d(_mm_fmadd_sd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmadd_ss() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(9., 2., 3., 4.); assert_eq_m128(_mm_fmadd_ss(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmaddsub_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(1., 15.); assert_eq_m128d(_mm_fmaddsub_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fmaddsub_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 3., 7., 2.); let c = _mm256_setr_pd(4., 9., 1., 7.); let r = _mm256_setr_pd(1., 15., 20., 15.); assert_eq_m256d(_mm256_fmaddsub_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmaddsub_ps() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(1., 15., 20., 15.); assert_eq_m128(_mm_fmaddsub_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fmaddsub_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); let r = _mm256_setr_ps(1., 15., 20., 15., 5., -49., 2., -31.); assert_eq_m256(_mm256_fmaddsub_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmsub_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(1., -3.); assert_eq_m128d(_mm_fmsub_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fmsub_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 3., 7., 2.); let c = _mm256_setr_pd(4., 9., 1., 7.); let r = _mm256_setr_pd(1., -3., 20., 1.); assert_eq_m256d(_mm256_fmsub_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmsub_ps() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(1., -3., 20., 1.); assert_eq_m128(_mm_fmsub_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fmsub_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); let r = _mm256_setr_ps(1., -3., 20., 1., 5., -71., 2., -25.); assert_eq_m256(_mm256_fmsub_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmsub_sd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(1., 2.); assert_eq_m128d(_mm_fmsub_sd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmsub_ss() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(1., 2., 3., 4.); assert_eq_m128(_mm_fmsub_ss(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmsubadd_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(9., -3.); assert_eq_m128d(_mm_fmsubadd_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fmsubadd_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 3., 7., 2.); let c = _mm256_setr_pd(4., 9., 1., 7.); let r = _mm256_setr_pd(9., -3., 22., 1.); assert_eq_m256d(_mm256_fmsubadd_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fmsubadd_ps() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(9., -3., 22., 1.); assert_eq_m128(_mm_fmsubadd_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fmsubadd_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); let r = _mm256_setr_ps(9., -3., 22., 1., -5., -71., -2., -25.); assert_eq_m256(_mm256_fmsubadd_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fnmadd_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(-1., 3.); assert_eq_m128d(_mm_fnmadd_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fnmadd_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 3., 7., 2.); let c = _mm256_setr_pd(4., 9., 1., 7.); let r = _mm256_setr_pd(-1., 3., -20., -1.); assert_eq_m256d(_mm256_fnmadd_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fnmadd_ps() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(-1., 3., -20., -1.); assert_eq_m128(_mm_fnmadd_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fnmadd_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); let r = _mm256_setr_ps(-1., 3., -20., -1., -5., 71., -2., 25.); assert_eq_m256(_mm256_fnmadd_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fnmadd_sd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(-1., 2.); assert_eq_m128d(_mm_fnmadd_sd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fnmadd_ss() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(-1., 2., 3., 4.); assert_eq_m128(_mm_fnmadd_ss(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fnmsub_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(-9., -15.); assert_eq_m128d(_mm_fnmsub_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fnmsub_pd() { let a = _mm256_setr_pd(1., 2., 3., 4.); let b = _mm256_setr_pd(5., 3., 7., 2.); let c = _mm256_setr_pd(4., 9., 1., 7.); let r = _mm256_setr_pd(-9., -15., -22., -15.); assert_eq_m256d(_mm256_fnmsub_pd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fnmsub_ps() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(-9., -15., -22., -15.); assert_eq_m128(_mm_fnmsub_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm256_fnmsub_ps() { let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.); let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.); let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.); let r = _mm256_setr_ps(-9., -15., -22., -15., 5., 49., 2., 31.); assert_eq_m256(_mm256_fnmsub_ps(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fnmsub_sd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(5., 3.); let c = _mm_setr_pd(4., 9.); let r = _mm_setr_pd(-9., 2.); assert_eq_m128d(_mm_fnmsub_sd(a, b, c), r); } #[simd_test(enable = "fma")] unsafe fn test_mm_fnmsub_ss() { let a = _mm_setr_ps(1., 2., 3., 4.); let b = _mm_setr_ps(5., 3., 7., 2.); let c = _mm_setr_ps(4., 9., 1., 7.); let r = _mm_setr_ps(-9., 2., 3., 4.); assert_eq_m128(_mm_fnmsub_ss(a, b, c), r); } } core_arch-0.1.5/src/x86/fxsr.rs010064400007650000024000000063271343447103600144420ustar0000000000000000//! FXSR floating-point context fast save and restor. #[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.fxsave"] fn fxsave(p: *mut u8) -> (); #[link_name = "llvm.x86.fxrstor"] fn fxrstor(p: *const u8) -> (); } /// Saves the `x87` FPU, `MMX` technology, `XMM`, and `MXCSR` registers to the /// 512-byte-long 16-byte-aligned memory region `mem_addr`. /// /// A misaligned destination operand raises a general-protection (#GP) or an /// alignment check exception (#AC). /// /// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. /// /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave) #[inline] #[target_feature(enable = "fxsr")] #[cfg_attr(test, assert_instr(fxsave))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _fxsave(mem_addr: *mut u8) { fxsave(mem_addr) } /// Restores the `XMM`, `MMX`, `MXCSR`, and `x87` FPU registers from the /// 512-byte-long 16-byte-aligned memory region `mem_addr`. /// /// The contents of this memory region should have been written to by a /// previous /// `_fxsave` or `_fxsave64` intrinsic. /// /// A misaligned destination operand raises a general-protection (#GP) or an /// alignment check exception (#AC). /// /// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. /// /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor) #[inline] #[target_feature(enable = "fxsr")] #[cfg_attr(test, assert_instr(fxrstor))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _fxrstor(mem_addr: *const u8) { fxrstor(mem_addr) } #[cfg(test)] mod tests { use crate::core_arch::x86::*; use std::{cmp::PartialEq, fmt}; use stdsimd_test::simd_test; #[repr(align(16))] struct FxsaveArea { data: [u8; 512], // 512 bytes } impl FxsaveArea { fn new() -> FxsaveArea { FxsaveArea { data: [0; 512] } } fn ptr(&mut self) -> *mut u8 { &mut self.data[0] as *mut _ as *mut u8 } } impl PartialEq for FxsaveArea { fn eq(&self, other: &FxsaveArea) -> bool { for i in 0..self.data.len() { if self.data[i] != other.data[i] { return false; } } true } } impl fmt::Debug for FxsaveArea { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "[")?; for i in 0..self.data.len() { write!(f, "{}", self.data[i])?; if i != self.data.len() - 1 { write!(f, ", ")?; } } write!(f, "]") } } #[simd_test(enable = "fxsr")] unsafe fn fxsave() { let mut a = FxsaveArea::new(); let mut b = FxsaveArea::new(); fxsr::_fxsave(a.ptr()); fxsr::_fxrstor(a.ptr()); fxsr::_fxsave(b.ptr()); assert_eq!(a, b); } } core_arch-0.1.5/src/x86/macros.rs010064400007650000024000000055411342163752400147430ustar0000000000000000//! Utility macros. macro_rules! constify_imm6 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b1_1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), 7 => $expand!(7), 8 => $expand!(8), 9 => $expand!(9), 10 => $expand!(10), 11 => $expand!(11), 12 => $expand!(12), 13 => $expand!(13), 14 => $expand!(14), 15 => $expand!(15), 16 => $expand!(16), 17 => $expand!(17), 18 => $expand!(18), 19 => $expand!(19), 20 => $expand!(20), 21 => $expand!(21), 22 => $expand!(22), 23 => $expand!(23), 24 => $expand!(24), 25 => $expand!(25), 26 => $expand!(26), 27 => $expand!(27), 28 => $expand!(28), 29 => $expand!(29), 30 => $expand!(30), _ => $expand!(31), } }; } macro_rules! constify_imm4 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b1111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), 7 => $expand!(7), 8 => $expand!(8), 9 => $expand!(9), 10 => $expand!(10), 11 => $expand!(11), 12 => $expand!(12), 13 => $expand!(13), 14 => $expand!(14), _ => $expand!(15), } }; } macro_rules! constify_imm3 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b111 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), 3 => $expand!(3), 4 => $expand!(4), 5 => $expand!(5), 6 => $expand!(6), _ => $expand!(7), } }; } macro_rules! constify_imm2 { ($imm8:expr, $expand:ident) => { #[allow(overflowing_literals)] match ($imm8) & 0b11 { 0 => $expand!(0), 1 => $expand!(1), 2 => $expand!(2), _ => $expand!(3), } }; } #[cfg(test)] macro_rules! assert_approx_eq { ($a:expr, $b:expr, $eps:expr) => {{ let (a, b) = (&$a, &$b); assert!( (*a - *b).abs() < $eps, "assertion failed: `(left !== right)` \ (left: `{:?}`, right: `{:?}`, expect diff: `{:?}`, real diff: `{:?}`)", *a, *b, $eps, (*a - *b).abs() ); }}; } core_arch-0.1.5/src/x86/mmx.rs010064400007650000024000000615321343447103600142600ustar0000000000000000//! `i586` MMX instruction set. //! //! The intrinsics here roughly correspond to those in the `mmintrin.h` C //! header. //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf use crate::{ core_arch::{simd::*, x86::*}, mem::transmute, }; #[cfg(test)] use stdsimd_test::assert_instr; /// Constructs a 64-bit integer vector initialized to zero. #[inline] #[target_feature(enable = "mmx")] // FIXME: this produces a movl instead of xorps on x86 // FIXME: this produces a xor intrinsic instead of xorps on x86_64 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))] pub unsafe fn _mm_setzero_si64() -> __m64 { transmute(0_i64) } /// Adds packed 8-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddb))] pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 { paddb(a, b) } /// Adds packed 8-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddb))] pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 { _mm_add_pi8(a, b) } /// Adds packed 16-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddw))] pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 { paddw(a, b) } /// Adds packed 16-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddw))] pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 { _mm_add_pi16(a, b) } /// Adds packed 32-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddd))] pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 { paddd(a, b) } /// Adds packed 32-bit integers in `a` and `b`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddd))] pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 { _mm_add_pi32(a, b) } /// Adds packed 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsb))] pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 { paddsb(a, b) } /// Adds packed 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsb))] pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 { _mm_adds_pi8(a, b) } /// Adds packed 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsw))] pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 { paddsw(a, b) } /// Adds packed 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddsw))] pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 { _mm_adds_pi16(a, b) } /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusb))] pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 { paddusb(a, b) } /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusb))] pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 { _mm_adds_pu8(a, b) } /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusw))] pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 { paddusw(a, b) } /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(paddusw))] pub unsafe fn _m_paddusw(a: __m64, b: __m64) -> __m64 { _mm_adds_pu16(a, b) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubb))] pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 { psubb(a, b) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubb))] pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 { _mm_sub_pi8(a, b) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubw))] pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 { psubw(a, b) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubw))] pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 { _mm_sub_pi16(a, b) } /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubd))] pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 { psubd(a, b) } /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubd))] pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 { _mm_sub_pi32(a, b) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` /// using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubsb))] pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 { psubsb(a, b) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` /// using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubsb))] pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 { _mm_subs_pi8(a, b) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` /// using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubsw))] pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 { psubsw(a, b) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` /// using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubsw))] pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 { _mm_subs_pi16(a, b) } /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit /// integers in `a` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubusb))] pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 { psubusb(a, b) } /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit /// integers in `a` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubusb))] pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 { _mm_subs_pu8(a, b) } /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned /// 16-bit integers in `a` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubusw))] pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 { psubusw(a, b) } /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned /// 16-bit integers in `a` using saturation. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(psubusw))] pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 { _mm_subs_pu16(a, b) } /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation. /// /// Positive values greater than 0x7F are saturated to 0x7F. Negative values /// less than 0x80 are saturated to 0x80. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(packsswb))] pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 { packsswb(a, b) } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation. /// /// Positive values greater than 0x7F are saturated to 0x7F. Negative values /// less than 0x80 are saturated to 0x80. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(packssdw))] pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 { packssdw(a, b) } /// Compares whether each element of `a` is greater than the corresponding /// element of `b` returning `0` for `false` and `-1` for `true`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(pcmpgtb))] pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 { pcmpgtb(a, b) } /// Compares whether each element of `a` is greater than the corresponding /// element of `b` returning `0` for `false` and `-1` for `true`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(pcmpgtw))] pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 { pcmpgtw(a, b) } /// Compares whether each element of `a` is greater than the corresponding /// element of `b` returning `0` for `false` and `-1` for `true`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(pcmpgtd))] pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 { pcmpgtd(a, b) } /// Unpacks the upper two elements from two `i16x4` vectors and interleaves /// them into the result: `[a.2, b.2, a.3, b.3]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 { punpckhwd(a, b) } /// Unpacks the upper four elements from two `i8x8` vectors and interleaves /// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpckhbw))] pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 { punpckhbw(a, b) } /// Unpacks the lower four elements from two `i8x8` vectors and interleaves /// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpcklbw))] pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 { punpcklbw(a, b) } /// Unpacks the lower two elements from two `i16x4` vectors and interleaves /// them into the result: `[a.0 b.0 a.1 b.1]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpcklwd))] pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 { punpcklwd(a, b) } /// Unpacks the upper element from two `i32x2` vectors and interleaves them /// into the result: `[a.1, b.1]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpckhdq))] pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 { punpckhdq(a, b) } /// Unpacks the lower element from two `i32x2` vectors and interleaves them /// into the result: `[a.0, b.0]`. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(punpckldq))] pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 { punpckldq(a, b) } /// Sets packed 16-bit integers in dst with the supplied values. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 { _mm_setr_pi16(e0, e1, e2, e3) } /// Sets packed 32-bit integers in dst with the supplied values. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 { _mm_setr_pi32(e0, e1) } /// Sets packed 8-bit integers in dst with the supplied values. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 { _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7) } /// Broadcasts 16-bit integer a to all all elements of dst. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 { _mm_setr_pi16(a, a, a, a) } /// Broadcasts 32-bit integer a to all all elements of dst. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 { _mm_setr_pi32(a, a) } /// Broadcasts 8-bit integer a to all all elements of dst. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 { _mm_setr_pi8(a, a, a, a, a, a, a, a) } /// Sets packed 16-bit integers in dst with the supplied values in reverse /// order. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 { transmute(i16x4::new(e0, e1, e2, e3)) } /// Sets packed 32-bit integers in dst with the supplied values in reverse /// order. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 { transmute(i32x2::new(e0, e1)) } /// Sets packed 8-bit integers in dst with the supplied values in reverse order. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_setr_pi8( e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8, ) -> __m64 { transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } /// Empty the MMX state, which marks the x87 FPU registers as available for use /// by x87 instructions. This instruction must be used at the end of all MMX /// technology procedures. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(emms))] pub unsafe fn _mm_empty() { emms() } /// Empty the MMX state, which marks the x87 FPU registers as available for use /// by x87 instructions. This instruction must be used at the end of all MMX /// technology procedures. #[inline] #[target_feature(enable = "mmx")] #[cfg_attr(test, assert_instr(emms))] pub unsafe fn _m_empty() { emms() } /// Copies 32-bit integer `a` to the lower elements of the return value, and zero /// the upper element of the return value. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_cvtsi32_si64(a: i32) -> __m64 { transmute(i32x2::new(a, 0)) } /// Return the lower 32-bit integer in `a`. #[inline] #[target_feature(enable = "mmx")] pub unsafe fn _mm_cvtsi64_si32(a: __m64) -> i32 { let r: i32x2 = transmute(a); r.0 } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.mmx.padd.b"] fn paddb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.padd.w"] fn paddw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.padd.d"] fn paddd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.padds.b"] fn paddsb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.padds.w"] fn paddsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.paddus.b"] fn paddusb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.paddus.w"] fn paddusw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psub.b"] fn psubb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psub.w"] fn psubw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psub.d"] fn psubd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psubs.b"] fn psubsb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psubs.w"] fn psubsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psubus.b"] fn psubusb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psubus.w"] fn psubusw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.packsswb"] fn packsswb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.packssdw"] fn packssdw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pcmpgt.b"] fn pcmpgtb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pcmpgt.w"] fn pcmpgtw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pcmpgt.d"] fn pcmpgtd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpckhwd"] fn punpckhwd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpcklwd"] fn punpcklwd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpckhbw"] fn punpckhbw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpcklbw"] fn punpcklbw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpckhdq"] fn punpckhdq(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.punpckldq"] fn punpckldq(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.emms"] fn emms(); } #[cfg(test)] mod tests { use crate::core_arch::x86::*; use stdsimd_test::simd_test; #[simd_test(enable = "mmx")] unsafe fn test_mm_setzero_si64() { let r: __m64 = transmute(0_i64); assert_eq_m64(r, _mm_setzero_si64()); } #[simd_test(enable = "mmx")] unsafe fn test_mm_add_pi8() { let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0); let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1); let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1); assert_eq_m64(e, _mm_add_pi8(a, b)); assert_eq_m64(e, _m_paddb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_add_pi16() { let a = _mm_setr_pi16(-1, -1, 1, 1); let b = _mm_setr_pi16(i16::min_value() + 1, 30001, -30001, i16::max_value() - 1); let e = _mm_setr_pi16(i16::min_value(), 30000, -30000, i16::max_value()); assert_eq_m64(e, _mm_add_pi16(a, b)); assert_eq_m64(e, _m_paddw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_add_pi32() { let a = _mm_setr_pi32(1, -1); let b = _mm_setr_pi32(i32::max_value() - 1, i32::min_value() + 1); let e = _mm_setr_pi32(i32::max_value(), i32::min_value()); assert_eq_m64(e, _mm_add_pi32(a, b)); assert_eq_m64(e, _m_paddd(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_adds_pi8() { let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0); let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1); let e = _mm_setr_pi8(i8::min_value(), 0, 0, i8::max_value(), -1, -1, 1, 1); assert_eq_m64(e, _mm_adds_pi8(a, b)); assert_eq_m64(e, _m_paddsb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_adds_pi16() { let a = _mm_setr_pi16(-32000, 32000, 4, 0); let b = _mm_setr_pi16(-32000, 32000, -5, 1); let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), -1, 1); assert_eq_m64(e, _mm_adds_pi16(a, b)); assert_eq_m64(e, _m_paddsw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_adds_pu8() { let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8); let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8); let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::max_value() as i8); assert_eq_m64(e, _mm_adds_pu8(a, b)); assert_eq_m64(e, _m_paddusb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_adds_pu16() { let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16); let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16); let e = _mm_setr_pi16(0, 11, 22, u16::max_value() as i16); assert_eq_m64(e, _mm_adds_pu16(a, b)); assert_eq_m64(e, _m_paddusw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_sub_pi8() { let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0); let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127); let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127); assert_eq_m64(e, _mm_sub_pi8(a, b)); assert_eq_m64(e, _m_psubb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_sub_pi16() { let a = _mm_setr_pi16(-20000, -20000, 20000, 30000); let b = _mm_setr_pi16(-10000, 10000, -10000, 30000); let e = _mm_setr_pi16(-10000, -30000, 30000, 0); assert_eq_m64(e, _mm_sub_pi16(a, b)); assert_eq_m64(e, _m_psubw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_sub_pi32() { let a = _mm_setr_pi32(500_000, -500_000); let b = _mm_setr_pi32(500_000, 500_000); let e = _mm_setr_pi32(0, -1_000_000); assert_eq_m64(e, _mm_sub_pi32(a, b)); assert_eq_m64(e, _m_psubd(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_subs_pi8() { let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5); let b = _mm_setr_pi8(100, -100, i8::min_value(), 127, -1, 1, 3, -3); let e = _mm_setr_pi8( i8::min_value(), i8::max_value(), i8::max_value(), -127, 1, -1, -8, 8, ); assert_eq_m64(e, _mm_subs_pi8(a, b)); assert_eq_m64(e, _m_psubsb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_subs_pi16() { let a = _mm_setr_pi16(-20000, 20000, 0, 0); let b = _mm_setr_pi16(20000, -20000, -1, 1); let e = _mm_setr_pi16(i16::min_value(), i16::max_value(), 1, -1); assert_eq_m64(e, _mm_subs_pi16(a, b)); assert_eq_m64(e, _m_psubsw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_subs_pu8() { let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80); let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0); let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80); assert_eq_m64(e, _mm_subs_pu8(a, b)); assert_eq_m64(e, _m_psubusb(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_subs_pu16() { let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16); let b = _mm_setr_pi16(20000, 300, 1, 11111); let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16); assert_eq_m64(e, _mm_subs_pu16(a, b)); assert_eq_m64(e, _m_psubusw(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_packs_pi16() { let a = _mm_setr_pi16(-1, 2, -3, 4); let b = _mm_setr_pi16(-5, 6, -7, 8); let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8); assert_eq_m64(r, _mm_packs_pi16(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_packs_pi32() { let a = _mm_setr_pi32(-1, 2); let b = _mm_setr_pi32(-5, 6); let r = _mm_setr_pi16(-1, 2, -5, 6); assert_eq_m64(r, _mm_packs_pi32(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_cmpgt_pi8() { let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1); let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1); assert_eq_m64(r, _mm_cmpgt_pi8(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_cmpgt_pi16() { let a = _mm_setr_pi16(0, 1, 2, 3); let b = _mm_setr_pi16(4, 3, 2, 1); let r = _mm_setr_pi16(0, 0, 0, -1); assert_eq_m64(r, _mm_cmpgt_pi16(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_cmpgt_pi32() { let a = _mm_setr_pi32(0, 3); let b = _mm_setr_pi32(1, 2); let r0 = _mm_setr_pi32(0, -1); let r1 = _mm_setr_pi32(-1, 0); assert_eq_m64(r0, _mm_cmpgt_pi32(a, b)); assert_eq_m64(r1, _mm_cmpgt_pi32(b, a)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpackhi_pi8() { let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15); let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14); let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14); assert_eq_m64(r, _mm_unpackhi_pi8(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpacklo_pi8() { let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15); let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11); assert_eq_m64(r, _mm_unpacklo_pi8(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpackhi_pi16() { let a = _mm_setr_pi16(0, 1, 2, 3); let b = _mm_setr_pi16(4, 5, 6, 7); let r = _mm_setr_pi16(2, 6, 3, 7); assert_eq_m64(r, _mm_unpackhi_pi16(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpacklo_pi16() { let a = _mm_setr_pi16(0, 1, 2, 3); let b = _mm_setr_pi16(4, 5, 6, 7); let r = _mm_setr_pi16(0, 4, 1, 5); assert_eq_m64(r, _mm_unpacklo_pi16(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpackhi_pi32() { let a = _mm_setr_pi32(0, 3); let b = _mm_setr_pi32(1, 2); let r = _mm_setr_pi32(3, 2); assert_eq_m64(r, _mm_unpackhi_pi32(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_unpacklo_pi32() { let a = _mm_setr_pi32(0, 3); let b = _mm_setr_pi32(1, 2); let r = _mm_setr_pi32(0, 1); assert_eq_m64(r, _mm_unpacklo_pi32(a, b)); } #[simd_test(enable = "mmx")] unsafe fn test_mm_empty() { _mm_empty(); } #[simd_test(enable = "mmx")] unsafe fn test_m_empty() { _m_empty(); } #[simd_test(enable = "mmx")] unsafe fn test_mm_cvtsi32_si64() { let a = _mm_cvtsi32_si64(42); let b = _mm_setr_pi32(42, 0); assert_eq_m64(a, b); } #[simd_test(enable = "mmx")] unsafe fn test_mm_cvtsi64_si32() { let a = _mm_setr_pi32(42, 666); let b = _mm_cvtsi64_si32(a); assert_eq!(b, 42); } } core_arch-0.1.5/src/x86/mod.rs010064400007650000024000000433461344736322300142430ustar0000000000000000//! `x86` and `x86_64` intrinsics. use crate::{intrinsics, marker::Sized, mem::transmute}; #[macro_use] mod macros; types! { /// 64-bit wide integer vector type, x86-specific /// /// This type is the same as the `__m64` type defined by Intel, /// representing a 64-bit SIMD register. Usage of this type typically /// corresponds to the `mmx` target feature. /// /// Internally this type may be viewed as: /// /// * `i8x8` - eight `i8` variables packed together /// * `i16x4` - four `i16` variables packed together /// * `i32x2` - two `i32` variables packed together /// /// (as well as unsigned versions). Each intrinsic may interpret the /// internal bits differently, check the documentation of the intrinsic /// to see how it's being used. /// /// Note that this means that an instance of `__m64` typically just means /// a "bag of bits" which is left up to interpretation at the point of use. /// /// Most intrinsics using `__m64` are prefixed with `_mm_` and the /// integer types tend to correspond to suffixes like "pi8" or "pi32" (not /// to be confused with "epiXX", used for `__m128i`). /// /// # Examples /// /// ``` /// # #![feature(stdsimd, mmx_target_feature)] /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # #[target_feature(enable = "mmx")] /// # unsafe fn foo() { /// let all_bytes_zero = _mm_setzero_si64(); /// let all_bytes_one = _mm_set1_pi8(1); /// let two_i32 = _mm_set_pi32(1, 2); /// # } /// # if is_x86_feature_detected!("mmx") { unsafe { foo() } } /// # } /// ``` pub struct __m64(i64); /// 128-bit wide integer vector type, x86-specific /// /// This type is the same as the `__m128i` type defined by Intel, /// representing a 128-bit SIMD register. Usage of this type typically /// corresponds to the `sse` and up target features for x86/x86_64. /// /// Internally this type may be viewed as: /// /// * `i8x16` - sixteen `i8` variables packed together /// * `i16x8` - eight `i16` variables packed together /// * `i32x4` - four `i32` variables packed together /// * `i64x2` - two `i64` variables packed together /// /// (as well as unsigned versions). Each intrinsic may interpret the /// internal bits differently, check the documentation of the intrinsic /// to see how it's being used. /// /// Note that this means that an instance of `__m128i` typically just means /// a "bag of bits" which is left up to interpretation at the point of use. /// /// Most intrinsics using `__m128i` are prefixed with `_mm_` and the /// integer types tend to correspond to suffixes like "epi8" or "epi32". /// /// # Examples /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # #[target_feature(enable = "sse2")] /// # unsafe fn foo() { /// let all_bytes_zero = _mm_setzero_si128(); /// let all_bytes_one = _mm_set1_epi8(1); /// let four_i32 = _mm_set_epi32(1, 2, 3, 4); /// # } /// # if is_x86_feature_detected!("sse2") { unsafe { foo() } } /// # } /// ``` #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m128i(i64, i64); /// 128-bit wide set of four `f32` types, x86-specific /// /// This type is the same as the `__m128` type defined by Intel, /// representing a 128-bit SIMD register which internally is consisted of /// four packed `f32` instances. Usage of this type typically corresponds /// to the `sse` and up target features for x86/x86_64. /// /// Note that unlike `__m128i`, the integer version of the 128-bit /// registers, this `__m128` type has *one* interpretation. Each instance /// of `__m128` always corresponds to `f32x4`, or four `f32` types packed /// together. /// /// Most intrinsics using `__m128` are prefixed with `_mm_` and are /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with /// "pd" which is used for `__m128d`. /// /// # Examples /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # #[target_feature(enable = "sse")] /// # unsafe fn foo() { /// let four_zeros = _mm_setzero_ps(); /// let four_ones = _mm_set1_ps(1.0); /// let four_floats = _mm_set_ps(1.0, 2.0, 3.0, 4.0); /// # } /// # if is_x86_feature_detected!("sse") { unsafe { foo() } } /// # } /// ``` #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m128(f32, f32, f32, f32); /// 128-bit wide set of two `f64` types, x86-specific /// /// This type is the same as the `__m128d` type defined by Intel, /// representing a 128-bit SIMD register which internally is consisted of /// two packed `f64` instances. Usage of this type typically corresponds /// to the `sse` and up target features for x86/x86_64. /// /// Note that unlike `__m128i`, the integer version of the 128-bit /// registers, this `__m128d` type has *one* interpretation. Each instance /// of `__m128d` always corresponds to `f64x2`, or two `f64` types packed /// together. /// /// Most intrinsics using `__m128d` are prefixed with `_mm_` and are /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with /// "ps" which is used for `__m128`. /// /// # Examples /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # #[target_feature(enable = "sse")] /// # unsafe fn foo() { /// let two_zeros = _mm_setzero_pd(); /// let two_ones = _mm_set1_pd(1.0); /// let two_floats = _mm_set_pd(1.0, 2.0); /// # } /// # if is_x86_feature_detected!("sse") { unsafe { foo() } } /// # } /// ``` #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m128d(f64, f64); /// 256-bit wide integer vector type, x86-specific /// /// This type is the same as the `__m256i` type defined by Intel, /// representing a 256-bit SIMD register. Usage of this type typically /// corresponds to the `avx` and up target features for x86/x86_64. /// /// Internally this type may be viewed as: /// /// * `i8x32` - thirty two `i8` variables packed together /// * `i16x16` - sixteen `i16` variables packed together /// * `i32x8` - eight `i32` variables packed together /// * `i64x4` - four `i64` variables packed together /// /// (as well as unsigned versions). Each intrinsic may interpret the /// internal bits differently, check the documentation of the intrinsic /// to see how it's being used. /// /// Note that this means that an instance of `__m256i` typically just means /// a "bag of bits" which is left up to interpretation at the point of use. /// /// # Examples /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # #[target_feature(enable = "avx")] /// # unsafe fn foo() { /// let all_bytes_zero = _mm256_setzero_si256(); /// let all_bytes_one = _mm256_set1_epi8(1); /// let eight_i32 = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); /// # } /// # if is_x86_feature_detected!("avx") { unsafe { foo() } } /// # } /// ``` #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m256i(i64, i64, i64, i64); /// 256-bit wide set of eight `f32` types, x86-specific /// /// This type is the same as the `__m256` type defined by Intel, /// representing a 256-bit SIMD register which internally is consisted of /// eight packed `f32` instances. Usage of this type typically corresponds /// to the `avx` and up target features for x86/x86_64. /// /// Note that unlike `__m256i`, the integer version of the 256-bit /// registers, this `__m256` type has *one* interpretation. Each instance /// of `__m256` always corresponds to `f32x8`, or eight `f32` types packed /// together. /// /// Most intrinsics using `__m256` are prefixed with `_mm256_` and are /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with /// "pd" which is used for `__m256d`. /// /// # Examples /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # #[target_feature(enable = "avx")] /// # unsafe fn foo() { /// let eight_zeros = _mm256_setzero_ps(); /// let eight_ones = _mm256_set1_ps(1.0); /// let eight_floats = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0); /// # } /// # if is_x86_feature_detected!("avx") { unsafe { foo() } } /// # } /// ``` #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32); /// 256-bit wide set of four `f64` types, x86-specific /// /// This type is the same as the `__m256d` type defined by Intel, /// representing a 256-bit SIMD register which internally is consisted of /// four packed `f64` instances. Usage of this type typically corresponds /// to the `avx` and up target features for x86/x86_64. /// /// Note that unlike `__m256i`, the integer version of the 256-bit /// registers, this `__m256d` type has *one* interpretation. Each instance /// of `__m256d` always corresponds to `f64x4`, or four `f64` types packed /// together. /// /// Most intrinsics using `__m256d` are prefixed with `_mm256_` and are /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with /// "ps" which is used for `__m256`. /// /// # Examples /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # #[target_feature(enable = "avx")] /// # unsafe fn foo() { /// let four_zeros = _mm256_setzero_pd(); /// let four_ones = _mm256_set1_pd(1.0); /// let four_floats = _mm256_set_pd(1.0, 2.0, 3.0, 4.0); /// # } /// # if is_x86_feature_detected!("avx") { unsafe { foo() } } /// # } /// ``` #[stable(feature = "simd_x86", since = "1.27.0")] pub struct __m256d(f64, f64, f64, f64); /// 512-bit wide integer vector type, x86-specific /// /// This type is the same as the `__m512i` type defined by Intel, /// representing a 512-bit SIMD register. Usage of this type typically /// corresponds to the `avx512*` and up target features for x86/x86_64. /// /// Internally this type may be viewed as: /// /// * `i8x64` - sixty-four `i8` variables packed together /// * `i16x32` - thirty-two `i16` variables packed together /// * `i32x16` - sixteen `i32` variables packed together /// * `i64x8` - eight `i64` variables packed together /// /// (as well as unsigned versions). Each intrinsic may interpret the /// internal bits differently, check the documentation of the intrinsic /// to see how it's being used. /// /// Note that this means that an instance of `__m512i` typically just means /// a "bag of bits" which is left up to interpretation at the point of use. pub struct __m512i(i64, i64, i64, i64, i64, i64, i64, i64); /// 512-bit wide set of sixteen `f32` types, x86-specific /// /// This type is the same as the `__m512` type defined by Intel, /// representing a 512-bit SIMD register which internally is consisted of /// eight packed `f32` instances. Usage of this type typically corresponds /// to the `avx512*` and up target features for x86/x86_64. /// /// Note that unlike `__m512i`, the integer version of the 512-bit /// registers, this `__m512` type has *one* interpretation. Each instance /// of `__m512` always corresponds to `f32x16`, or sixteen `f32` types /// packed together. /// /// Most intrinsics using `__m512` are prefixed with `_mm512_` and are /// suffixed with "ps" (or otherwise contain "ps"). Not to be confused with /// "pd" which is used for `__m512d`. pub struct __m512( f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, ); /// 512-bit wide set of eight `f64` types, x86-specific /// /// This type is the same as the `__m512d` type defined by Intel, /// representing a 512-bit SIMD register which internally is consisted of /// eight packed `f64` instances. Usage of this type typically corresponds /// to the `avx` and up target features for x86/x86_64. /// /// Note that unlike `__m512i`, the integer version of the 512-bit /// registers, this `__m512d` type has *one* interpretation. Each instance /// of `__m512d` always corresponds to `f64x4`, or eight `f64` types packed /// together. /// /// Most intrinsics using `__m512d` are prefixed with `_mm512_` and are /// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with /// "ps" which is used for `__m512`. pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64); } /// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer #[allow(non_camel_case_types)] pub type __mmask16 = i16; #[cfg(test)] mod test; #[cfg(test)] pub use self::test::*; #[allow(non_camel_case_types)] #[unstable(feature = "stdimd_internal", issue = "0")] pub(crate) trait m128iExt: Sized { fn as_m128i(self) -> __m128i; #[inline] fn as_u8x16(self) -> crate::core_arch::simd::u8x16 { unsafe { transmute(self.as_m128i()) } } #[inline] fn as_u16x8(self) -> crate::core_arch::simd::u16x8 { unsafe { transmute(self.as_m128i()) } } #[inline] fn as_u32x4(self) -> crate::core_arch::simd::u32x4 { unsafe { transmute(self.as_m128i()) } } #[inline] fn as_u64x2(self) -> crate::core_arch::simd::u64x2 { unsafe { transmute(self.as_m128i()) } } #[inline] fn as_i8x16(self) -> crate::core_arch::simd::i8x16 { unsafe { transmute(self.as_m128i()) } } #[inline] fn as_i16x8(self) -> crate::core_arch::simd::i16x8 { unsafe { transmute(self.as_m128i()) } } #[inline] fn as_i32x4(self) -> crate::core_arch::simd::i32x4 { unsafe { transmute(self.as_m128i()) } } #[inline] fn as_i64x2(self) -> crate::core_arch::simd::i64x2 { unsafe { transmute(self.as_m128i()) } } } impl m128iExt for __m128i { #[inline] fn as_m128i(self) -> Self { self } } #[allow(non_camel_case_types)] #[unstable(feature = "stdimd_internal", issue = "0")] pub(crate) trait m256iExt: Sized { fn as_m256i(self) -> __m256i; #[inline] fn as_u8x32(self) -> crate::core_arch::simd::u8x32 { unsafe { transmute(self.as_m256i()) } } #[inline] fn as_u16x16(self) -> crate::core_arch::simd::u16x16 { unsafe { transmute(self.as_m256i()) } } #[inline] fn as_u32x8(self) -> crate::core_arch::simd::u32x8 { unsafe { transmute(self.as_m256i()) } } #[inline] fn as_u64x4(self) -> crate::core_arch::simd::u64x4 { unsafe { transmute(self.as_m256i()) } } #[inline] fn as_i8x32(self) -> crate::core_arch::simd::i8x32 { unsafe { transmute(self.as_m256i()) } } #[inline] fn as_i16x16(self) -> crate::core_arch::simd::i16x16 { unsafe { transmute(self.as_m256i()) } } #[inline] fn as_i32x8(self) -> crate::core_arch::simd::i32x8 { unsafe { transmute(self.as_m256i()) } } #[inline] fn as_i64x4(self) -> crate::core_arch::simd::i64x4 { unsafe { transmute(self.as_m256i()) } } } impl m256iExt for __m256i { #[inline] fn as_m256i(self) -> Self { self } } #[allow(non_camel_case_types)] #[unstable(feature = "stdimd_internal", issue = "0")] pub(crate) trait m512iExt: Sized { fn as_m512i(self) -> __m512i; #[inline] fn as_i32x16(self) -> crate::core_arch::simd::i32x16 { unsafe { transmute(self.as_m512i()) } } } impl m512iExt for __m512i { #[inline] fn as_m512i(self) -> Self { self } } mod eflags; pub use self::eflags::*; mod fxsr; pub use self::fxsr::*; mod bswap; pub use self::bswap::*; mod rdtsc; pub use self::rdtsc::*; mod cpuid; pub use self::cpuid::*; mod xsave; pub use self::xsave::*; mod sse; pub use self::sse::*; mod sse2; pub use self::sse2::*; mod sse3; pub use self::sse3::*; mod ssse3; pub use self::ssse3::*; mod sse41; pub use self::sse41::*; mod sse42; pub use self::sse42::*; mod avx; pub use self::avx::*; mod avx2; pub use self::avx2::*; mod fma; pub use self::fma::*; mod abm; pub use self::abm::*; mod bmi1; pub use self::bmi1::*; mod bmi2; pub use self::bmi2::*; #[cfg(not(stdsimd_intel_sde))] mod sse4a; #[cfg(not(stdsimd_intel_sde))] pub use self::sse4a::*; #[cfg(not(stdsimd_intel_sde))] mod tbm; #[cfg(not(stdsimd_intel_sde))] pub use self::tbm::*; mod mmx; pub use self::mmx::*; mod pclmulqdq; pub use self::pclmulqdq::*; mod aes; pub use self::aes::*; mod rdrand; pub use self::rdrand::*; mod sha; pub use self::sha::*; mod adx; pub use self::adx::*; #[cfg(test)] use stdsimd_test::assert_instr; /// Generates the trap instruction `UD2` #[cfg_attr(test, assert_instr(ud2))] #[inline] pub unsafe fn ud2() -> ! { intrinsics::abort() } mod avx512f; pub use self::avx512f::*; mod avx512ifma; pub use self::avx512ifma::*; mod bt; pub use self::bt::*; core_arch-0.1.5/src/x86/pclmulqdq.rs010064400007650000024000000062361343447103600154610ustar0000000000000000//! Carry-less Multiplication (CLMUL) //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241). //! //! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf use crate::core_arch::x86::__m128i; #[cfg(test)] use crate::stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.pclmulqdq"] fn pclmulqdq(a: __m128i, round_key: __m128i, imm8: u8) -> __m128i; } /// Performs a carry-less multiplication of two 64-bit polynomials over the /// finite field GF(2^k). /// /// The immediate byte is used for determining which halves of `a` and `b` /// should be used. Immediate bits other than 0 and 4 are ignored. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128) #[inline] #[target_feature(enable = "pclmulqdq")] #[cfg_attr(all(test, not(target_os = "linux")), assert_instr(pclmulqdq, imm8 = 0))] #[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmullqlqdq, imm8 = 0))] #[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmulhqlqdq, imm8 = 1))] #[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmullqhqdq, imm8 = 16))] #[cfg_attr(all(test, target_os = "linux"), assert_instr(pclmulhqhqdq, imm8 = 17))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_clmulepi64_si128(a: __m128i, b: __m128i, imm8: i32) -> __m128i { macro_rules! call { ($imm8:expr) => { pclmulqdq(a, b, $imm8) }; } constify_imm8!(imm8, call) } #[cfg(test)] mod tests { // The constants in the tests below are just bit patterns. They should not // be interpreted as integers; signedness does not make sense for them, but // __m128i happens to be defined in terms of signed integers. #![allow(overflowing_literals)] use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "pclmulqdq")] unsafe fn test_mm_clmulepi64_si128() { // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d); let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d); let r00 = _mm_set_epi64x(0x1d4d84c85c3440c0, 0x929633d5d36f0451); let r01 = _mm_set_epi64x(0x1bd17c8d556ab5a1, 0x7fa540ac2a281315); let r10 = _mm_set_epi64x(0x1a2bf6db3a30862f, 0xbabf262df4b7d5c9); let r11 = _mm_set_epi64x(0x1d1e1f2c592e7c45, 0xd66ee03e410fd4ed); assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x00), r00); assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x10), r01); assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x01), r10); assert_eq_m128i(_mm_clmulepi64_si128(a, b, 0x11), r11); let a0 = _mm_set_epi64x(0x0000000000000000, 0x8000000000000000); let r = _mm_set_epi64x(0x4000000000000000, 0x0000000000000000); assert_eq_m128i(_mm_clmulepi64_si128(a0, a0, 0x00), r); } } core_arch-0.1.5/src/x86/rdrand.rs010064400007650000024000000052431345561510300147240ustar0000000000000000//! RDRAND and RDSEED instructions for returning random numbers from an Intel //! on-chip hardware random number generator which has been seeded by an //! on-chip entropy source. #![allow(clippy::module_name_repetitions)] #[allow(improper_ctypes)] extern "unadjusted" { #[link_name = "llvm.x86.rdrand.16"] fn x86_rdrand16_step() -> (u16, i32); #[link_name = "llvm.x86.rdrand.32"] fn x86_rdrand32_step() -> (u32, i32); #[link_name = "llvm.x86.rdseed.16"] fn x86_rdseed16_step() -> (u16, i32); #[link_name = "llvm.x86.rdseed.32"] fn x86_rdseed32_step() -> (u32, i32); } #[cfg(test)] use stdsimd_test::assert_instr; /// Read a hardware generated 16-bit random value and store the result in val. /// Returns 1 if a random value was generated, and 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand16_step) #[inline] #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 { let (v, flag) = x86_rdrand16_step(); *val = v; flag } /// Read a hardware generated 32-bit random value and store the result in val. /// Returns 1 if a random value was generated, and 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand32_step) #[inline] #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 { let (v, flag) = x86_rdrand32_step(); *val = v; flag } /// Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store /// in val. Return 1 if a random value was generated, and 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed16_step) #[inline] #[target_feature(enable = "rdseed")] #[cfg_attr(test, assert_instr(rdseed))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 { let (v, flag) = x86_rdseed16_step(); *val = v; flag } /// Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store /// in val. Return 1 if a random value was generated, and 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed32_step) #[inline] #[target_feature(enable = "rdseed")] #[cfg_attr(test, assert_instr(rdseed))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdseed32_step(val: &mut u32) -> i32 { let (v, flag) = x86_rdseed32_step(); *val = v; flag } core_arch-0.1.5/src/x86/rdtsc.rs010064400007650000024000000046761343447103600146040ustar0000000000000000//! RDTSC instructions. #[cfg(test)] use stdsimd_test::assert_instr; /// Reads the current value of the processor’s time-stamp counter. /// /// The processor monotonically increments the time-stamp counter MSR /// every clock cycle and resets it to 0 whenever the processor is /// reset. /// /// The RDTSC instruction is not a serializing instruction. It does /// not necessarily wait until all previous instructions have been /// executed before reading the counter. Similarly, subsequent /// instructions may begin execution before the read operation is /// performed. /// /// On processors that support the Intel 64 architecture, the /// high-order 32 bits of each of RAX and RDX are cleared. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdtsc) #[inline] #[cfg_attr(test, assert_instr(rdtsc))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdtsc() -> i64 { rdtsc() } /// Reads the current value of the processor’s time-stamp counter and /// the `IA32_TSC_AUX MSR`. /// /// The processor monotonically increments the time-stamp counter MSR /// every clock cycle and resets it to 0 whenever the processor is /// reset. /// /// The RDTSCP instruction waits until all previous instructions have /// been executed before reading the counter. However, subsequent /// instructions may begin execution before the read operation is /// performed. /// /// On processors that support the Intel 64 architecture, the /// high-order 32 bits of each of RAX, RDX, and RCX are cleared. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__rdtscp) #[inline] #[cfg_attr(test, assert_instr(rdtscp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn __rdtscp(aux: *mut u32) -> u64 { rdtscp(aux as *mut _) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.rdtsc"] fn rdtsc() -> i64; #[link_name = "llvm.x86.rdtscp"] fn rdtscp(aux: *mut u8) -> u64; } #[cfg(test)] mod tests { use crate::core_arch::x86::*; use stdsimd_test::simd_test; #[simd_test(enable = "sse2")] unsafe fn _rdtsc() { let r = rdtsc::_rdtsc(); assert_ne!(r, 0); // The chances of this being 0 are infinitesimal } #[simd_test(enable = "sse2")] unsafe fn _rdtscp() { let mut aux = 0; let r = rdtsc::__rdtscp(&mut aux); assert_ne!(r, 0); // The chances of this being 0 are infinitesimal } } core_arch-0.1.5/src/x86/sha.rs010064400007650000024000000216741343447103600142350ustar0000000000000000use crate::{ core_arch::{simd::*, x86::*}, mem::transmute, }; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sha1msg1"] fn sha1msg1(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.sha1msg2"] fn sha1msg2(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.sha1nexte"] fn sha1nexte(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.sha1rnds4"] fn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4; #[link_name = "llvm.x86.sha256msg1"] fn sha256msg1(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.sha256msg2"] fn sha256msg2(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.sha256rnds2"] fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4; } #[cfg(test)] use stdsimd_test::assert_instr; /// Performs an intermediate calculation for the next four SHA1 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, /// and returning the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg1_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1msg1))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) } /// Performs the final calculation for the next four SHA1 message values /// (unsigned 32-bit integers) using the intermediate result in `a` and the /// previous message values in `b`, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg2_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1msg2))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { transmute(sha1msg2(a.as_i32x4(), b.as_i32x4())) } /// Calculate SHA1 state variable E after four rounds of operation from the /// current SHA1 state variable `a`, add that value to the scheduled values /// (unsigned 32-bit integers) in `b`, and returns the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1nexte_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1nexte))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) } /// Performs four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) /// from `a` and some pre-computed sum of the next 4 round message values /// (unsigned 32-bit integers), and state variable E from `b`, and return the /// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round /// constants. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1rnds4_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha1rnds4, func = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i, func: i32) -> __m128i { let a = a.as_i32x4(); let b = b.as_i32x4(); macro_rules! call { ($imm2:expr) => { sha1rnds4(a, b, $imm2) }; } let ret = constify_imm2!(func, call); transmute(ret) } /// Performs an intermediate calculation for the next four SHA256 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, /// and return the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg1_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha256msg1))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) } /// Performs the final calculation for the next four SHA256 message values /// (unsigned 32-bit integers) using previous message values from `a` and `b`, /// and return the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg2_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha256msg2))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) } /// Performs 2 rounds of SHA256 operation using an initial SHA256 state /// (C,D,G,H) from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a /// pre-computed sum of the next 2 round message values (unsigned 32-bit /// integers) and the corresponding round constants from `k`, and store the /// updated SHA256 state (A,B,E,F) in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256rnds2_epu32) #[inline] #[target_feature(enable = "sha")] #[cfg_attr(test, assert_instr(sha256rnds2))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sha256rnds2_epu32(a: __m128i, b: __m128i, k: __m128i) -> __m128i { transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) } #[cfg(test)] mod tests { use std::f32; use std::f64::{self, NAN}; use std::i32; use std::mem::{self, transmute}; use crate::core_arch::{simd::*, x86::*}; use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. #[simd_test(enable = "sha")] #[allow(overflowing_literals)] unsafe fn test_mm_sha1msg1_epu32() { let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c); let r = _mm_sha1msg1_epu32(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "sha")] #[allow(overflowing_literals)] unsafe fn test_mm_sha1msg2_epu32() { let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35); let r = _mm_sha1msg2_epu32(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "sha")] #[allow(overflowing_literals)] unsafe fn test_mm_sha1nexte_epu32() { let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b); let r = _mm_sha1nexte_epu32(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "sha")] #[allow(overflowing_literals)] unsafe fn test_mm_sha1rnds4_epu32() { let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f); let r = _mm_sha1rnds4_epu32(a, b, 0); assert_eq_m128i(r, expected); let expected = _mm_set_epi64x(0x6d4c43e56a3c25d9, 0xa7e00fb775cbd3fe); let r = _mm_sha1rnds4_epu32(a, b, 1); assert_eq_m128i(r, expected); let expected = _mm_set_epi64x(0xb304e383c01222f4, 0x66f6b3b1f89d8001); let r = _mm_sha1rnds4_epu32(a, b, 2); assert_eq_m128i(r, expected); let expected = _mm_set_epi64x(0x8189b758bfabfa79, 0xdb08f6e78cae098b); let r = _mm_sha1rnds4_epu32(a, b, 3); assert_eq_m128i(r, expected); } #[simd_test(enable = "sha")] #[allow(overflowing_literals)] unsafe fn test_mm_sha256msg1_epu32() { let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee); let r = _mm_sha256msg1_epu32(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "sha")] #[allow(overflowing_literals)] unsafe fn test_mm_sha256msg2_epu32() { let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450); let r = _mm_sha256msg2_epu32(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "sha")] #[allow(overflowing_literals)] unsafe fn test_mm_sha256rnds2_epu32() { let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98); let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b); let k = _mm_set_epi64x(0, 0x12835b01d807aa98); let expected = _mm_set_epi64x(0xd3063037effb15ea, 0x187ee3db0d6d1d19); let r = _mm_sha256rnds2_epu32(a, b, k); assert_eq_m128i(r, expected); } } core_arch-0.1.5/src/x86/sse.rs010064400007650000024000004351071345561510300142520ustar0000000000000000//! Streaming SIMD Extensions (SSE) use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, intrinsics, mem, ptr, }; #[cfg(test)] use stdsimd_test::assert_instr; /// Adds the first component of `a` and `b`, the other components are copied /// from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(addss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_ss(a: __m128, b: __m128) -> __m128 { addss(a, b) } /// Adds __m128 vectors. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(addps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_ps(a: __m128, b: __m128) -> __m128 { simd_add(a, b) } /// Subtracts the first component of `b` from `a`, the other components are /// copied from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(subss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 { subss(a, b) } /// Subtracts __m128 vectors. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(subps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 { simd_sub(a, b) } /// Multiplies the first component of `a` and `b`, the other components are /// copied from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(mulss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 { mulss(a, b) } /// Multiplies __m128 vectors. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(mulps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 { simd_mul(a, b) } /// Divides the first component of `b` by `a`, the other components are /// copied from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(divss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_div_ss(a: __m128, b: __m128) -> __m128 { divss(a, b) } /// Divides __m128 vectors. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(divps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_div_ps(a: __m128, b: __m128) -> __m128 { simd_div(a, b) } /// Returns the square root of the first single-precision (32-bit) /// floating-point element in `a`, the other elements are unchanged. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(sqrtss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sqrt_ss(a: __m128) -> __m128 { sqrtss(a) } /// Returns the square root of packed single-precision (32-bit) floating-point /// elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(sqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sqrt_ps(a: __m128) -> __m128 { sqrtps(a) } /// Returns the approximate reciprocal of the first single-precision /// (32-bit) floating-point element in `a`, the other elements are unchanged. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rcpss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_rcp_ss(a: __m128) -> __m128 { rcpss(a) } /// Returns the approximate reciprocal of packed single-precision (32-bit) /// floating-point elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rcp_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rcpps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_rcp_ps(a: __m128) -> __m128 { rcpps(a) } /// Returns the approximate reciprocal square root of the fist single-precision /// (32-bit) floating-point elements in `a`, the other elements are unchanged. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rsqrtss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_rsqrt_ss(a: __m128) -> __m128 { rsqrtss(a) } /// Returns the approximate reciprocal square root of packed single-precision /// (32-bit) floating-point elements in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_rsqrt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(rsqrtps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_rsqrt_ps(a: __m128) -> __m128 { rsqrtps(a) } /// Compares the first single-precision (32-bit) floating-point element of `a` /// and `b`, and return the minimum value in the first element of the return /// value, the other elements are copied from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(minss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_ss(a: __m128, b: __m128) -> __m128 { minss(a, b) } /// Compares packed single-precision (32-bit) floating-point elements in `a` and /// `b`, and return the corresponding minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(minps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_ps(a: __m128, b: __m128) -> __m128 { minps(a, b) } /// Compares the first single-precision (32-bit) floating-point element of `a` /// and `b`, and return the maximum value in the first element of the return /// value, the other elements are copied from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(maxss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_ss(a: __m128, b: __m128) -> __m128 { maxss(a, b) } /// Compares packed single-precision (32-bit) floating-point elements in `a` and /// `b`, and return the corresponding maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(maxps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_ps(a: __m128, b: __m128) -> __m128 { maxps(a, b) } /// Bitwise AND of packed single-precision (32-bit) floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_ps) #[inline] #[target_feature(enable = "sse")] // i586 only seems to generate plain `and` instructions, so ignore it. #[cfg_attr( all(test, any(target_arch = "x86_64", target_feature = "sse2")), assert_instr(andps) )] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_and_ps(a: __m128, b: __m128) -> __m128 { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); mem::transmute(simd_and(a, b)) } /// Bitwise AND-NOT of packed single-precision (32-bit) floating-point /// elements. /// /// Computes `!a & b` for each bit in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_ps) #[inline] #[target_feature(enable = "sse")] // i586 only seems to generate plain `not` and `and` instructions, so ignore // it. #[cfg_attr( all(test, any(target_arch = "x86_64", target_feature = "sse2")), assert_instr(andnps) )] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); let mask: __m128i = mem::transmute(i32x4::splat(-1)); mem::transmute(simd_and(simd_xor(mask, a), b)) } /// Bitwise OR of packed single-precision (32-bit) floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_ps) #[inline] #[target_feature(enable = "sse")] // i586 only seems to generate plain `or` instructions, so we ignore it. #[cfg_attr( all(test, any(target_arch = "x86_64", target_feature = "sse2")), assert_instr(orps) )] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_or_ps(a: __m128, b: __m128) -> __m128 { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); mem::transmute(simd_or(a, b)) } /// Bitwise exclusive OR of packed single-precision (32-bit) floating-point /// elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_ps) #[inline] #[target_feature(enable = "sse")] // i586 only seems to generate plain `xor` instructions, so we ignore it. #[cfg_attr( all(test, any(target_arch = "x86_64", target_feature = "sse2")), assert_instr(xorps) )] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 { let a: __m128i = mem::transmute(a); let b: __m128i = mem::transmute(b); mem::transmute(simd_xor(a, b)) } /// Compares the lowest `f32` of both inputs for equality. The lowest 32 bits of /// the result will be `0xffffffff` if the two inputs are equal, or `0` /// otherwise. The upper 96 bits of the result are the upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpeqss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 0) } /// Compares the lowest `f32` of both inputs for less than. The lowest 32 bits /// of the result will be `0xffffffff` if `a.extract(0)` is less than /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the /// upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 1) } /// Compares the lowest `f32` of both inputs for less than or equal. The lowest /// 32 bits of the result will be `0xffffffff` if `a.extract(0)` is less than /// or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpless))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmple_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 2) } /// Compares the lowest `f32` of both inputs for greater than. The lowest 32 /// bits of the result will be `0xffffffff` if `a.extract(0)` is greater /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 1), [4, 1, 2, 3]) } /// Compares the lowest `f32` of both inputs for greater than or equal. The /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is /// greater than or equal `b.extract(0)`, or `0` otherwise. The upper 96 bits /// of the result are the upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpless))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 2), [4, 1, 2, 3]) } /// Compares the lowest `f32` of both inputs for inequality. The lowest 32 bits /// of the result will be `0xffffffff` if `a.extract(0)` is not equal to /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the /// upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpneqss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpneq_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 4) } /// Compares the lowest `f32` of both inputs for not-less-than. The lowest 32 /// bits of the result will be `0xffffffff` if `a.extract(0)` is not less than /// `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are the /// upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnlt_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 5) } /// Compares the lowest `f32` of both inputs for not-less-than-or-equal. The /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not /// less than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 bits /// of the result are the upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnless))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnle_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 6) } /// Compares the lowest `f32` of both inputs for not-greater-than. The lowest 32 /// bits of the result will be `0xffffffff` if `a.extract(0)` is not greater /// than `b.extract(0)`, or `0` otherwise. The upper 96 bits of the result are /// the upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 5), [4, 1, 2, 3]) } /// Compares the lowest `f32` of both inputs for not-greater-than-or-equal. The /// lowest 32 bits of the result will be `0xffffffff` if `a.extract(0)` is not /// greater than or equal to `b.extract(0)`, or `0` otherwise. The upper 96 /// bits of the result are the upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnless))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, cmpss(b, a, 6), [4, 1, 2, 3]) } /// Checks if the lowest `f32` of both inputs are ordered. The lowest 32 bits of /// the result will be `0xffffffff` if neither of `a.extract(0)` or /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpordss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpord_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 7) } /// Checks if the lowest `f32` of both inputs are unordered. The lowest 32 bits /// of the result will be `0xffffffff` if any of `a.extract(0)` or /// `b.extract(0)` is a NaN, or `0` otherwise. The upper 96 bits of the result /// are the upper 96 bits of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpunordss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpunord_ss(a: __m128, b: __m128) -> __m128 { cmpss(a, b, 3) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input elements /// were equal, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpeqps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 0) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is less than the corresponding element in `b`, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 1) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is less than or equal to the corresponding element in `b`, or `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpleps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmple_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 2) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is greater than the corresponding element in `b`, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpltps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 1) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is greater than or equal to the corresponding element in `b`, or `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpleps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 2) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input elements /// are **not** equal, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpneqps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpneq_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 4) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is **not** less than the corresponding element in `b`, or `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnlt_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 5) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is **not** less than or equal to the corresponding element in `b`, or /// `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnleps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnle_ps(a: __m128, b: __m128) -> __m128 { cmpps(a, b, 6) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is **not** greater than the corresponding element in `b`, or `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnltps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 5) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// The result in the output vector will be `0xffffffff` if the input element /// in `a` is **not** greater than or equal to the corresponding element in `b`, /// or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpnleps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 6) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// Returns four floats that have one of two possible bit patterns. The element /// in the output vector will be `0xffffffff` if the input elements in `a` and /// `b` are ordered (i.e., neither of them is a NaN), or 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpordps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpord_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 7) } /// Compares each of the four floats in `a` to the corresponding element in `b`. /// Returns four floats that have one of two possible bit patterns. The element /// in the output vector will be `0xffffffff` if the input elements in `a` and /// `b` are unordered (i.e., at least on of them is a NaN), or 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cmpunordps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpunord_ps(a: __m128, b: __m128) -> __m128 { cmpps(b, a, 3) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are equal, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comieq_ss(a: __m128, b: __m128) -> i32 { comieq_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than the one from `b`, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comilt_ss(a: __m128, b: __m128) -> i32 { comilt_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than or equal to the one from `b`, or `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comile_ss(a: __m128, b: __m128) -> i32 { comile_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than the one from `b`, or `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comigt_ss(a: __m128, b: __m128) -> i32 { comigt_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than or equal to the one from `b`, or /// `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comige_ss(a: __m128, b: __m128) -> i32 { comige_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are **not** equal, or `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(comiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comineq_ss(a: __m128, b: __m128) -> i32 { comineq_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are equal, or `0` otherwise. This instruction will not signal /// an exception if either argument is a quiet NaN. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomieq_ss(a: __m128, b: __m128) -> i32 { ucomieq_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than the one from `b`, or `0` otherwise. /// This instruction will not signal an exception if either argument is a quiet /// NaN. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomilt_ss(a: __m128, b: __m128) -> i32 { ucomilt_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is less than or equal to the one from `b`, or `0` /// otherwise. This instruction will not signal an exception if either argument /// is a quiet NaN. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomile_ss(a: __m128, b: __m128) -> i32 { ucomile_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than the one from `b`, or `0` /// otherwise. This instruction will not signal an exception if either argument /// is a quiet NaN. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomigt_ss(a: __m128, b: __m128) -> i32 { ucomigt_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if the value from `a` is greater than or equal to the one from `b`, or /// `0` otherwise. This instruction will not signal an exception if either /// argument is a quiet NaN. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomige_ss(a: __m128, b: __m128) -> i32 { ucomige_ss(a, b) } /// Compares two 32-bit floats from the low-order bits of `a` and `b`. Returns /// `1` if they are **not** equal, or `0` otherwise. This instruction will not /// signal an exception if either argument is a quiet NaN. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ucomiss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomineq_ss(a: __m128, b: __m128) -> i32 { ucomineq_ss(a, b) } /// Converts the lowest 32 bit float in the input vector to a 32 bit integer. /// /// The result is rounded according to the current rounding mode. If the result /// cannot be represented as a 32 bit integer the result will be `0x8000_0000` /// (`std::i32::MIN`) or an invalid operation floating point exception if /// unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). /// /// This corresponds to the `CVTSS2SI` instruction (with 32 bit output). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si32) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtss_si32(a: __m128) -> i32 { cvtss2si(a) } /// Alias for [`_mm_cvtss_si32`](fn._mm_cvtss_si32.html). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ss2si) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvt_ss2si(a: __m128) -> i32 { _mm_cvtss_si32(a) } /// Converts the lowest 32 bit float in the input vector to a 32 bit integer /// with /// truncation. /// /// The result is rounded always using truncation (round towards zero). If the /// result cannot be represented as a 32 bit integer the result will be /// `0x8000_0000` (`std::i32::MIN`) or an invalid operation floating point /// exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). /// /// This corresponds to the `CVTTSS2SI` instruction (with 32 bit output). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si32) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvttss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttss_si32(a: __m128) -> i32 { cvttss2si(a) } /// Alias for [`_mm_cvttss_si32`](fn._mm_cvttss_si32.html). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ss2si) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvttss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtt_ss2si(a: __m128) -> i32 { _mm_cvttss_si32(a) } /// Extracts the lowest 32 bit float from the input vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_f32) #[inline] #[target_feature(enable = "sse")] // No point in using assert_instrs. In Unix x86_64 calling convention this is a // no-op, and on Windows it's just a `mov`. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtss_f32(a: __m128) -> f32 { simd_extract(a, 0) } /// Converts a 32 bit integer to a 32 bit float. The result vector is the input /// vector `a` with the lowest 32 bit float replaced by the converted integer. /// /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 32 bit /// input). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtsi2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 { cvtsi2ss(a, b) } /// Alias for [`_mm_cvtsi32_ss`](fn._mm_cvtsi32_ss.html). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_si2ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtsi2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 { _mm_cvtsi32_ss(a, b) } /// Construct a `__m128` with the lowest element set to `a` and the rest set to /// zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_ss(a: f32) -> __m128 { __m128(a, 0.0, 0.0, 0.0) } /// Construct a `__m128` with all element set to `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(shufps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_ps(a: f32) -> __m128 { __m128(a, a, a, a) } /// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ps1) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(shufps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_ps1(a: f32) -> __m128 { _mm_set1_ps(a) } /// Construct a `__m128` from four floating point values highest to lowest. /// /// Note that `a` will be the highest 32 bits of the result, and `d` the /// lowest. This matches the standard way of writing bit patterns on x86: /// /// ```text /// bit 127 .. 96 95 .. 64 63 .. 32 31 .. 0 /// +---------+---------+---------+---------+ /// | a | b | c | d | result /// +---------+---------+---------+---------+ /// ``` /// /// Alternatively: /// /// ```text /// let v = _mm_set_ps(d, c, b, a); /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(unpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { __m128(d, c, b, a) } /// Construct a `__m128` from four floating point values lowest to highest. /// /// This matches the memory order of `__m128`, i.e., `a` will be the lowest 32 /// bits of the result, and `d` the highest. /// /// ```text /// assert_eq!(__m128::new(a, b, c, d), _mm_setr_ps(a, b, c, d)); /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(unpcklps))] // On a 32-bit architecture it just copies the operands from the stack. #[cfg_attr(all(test, target_arch = "x86"), assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 { __m128(a, b, c, d) } /// Construct a `__m128` with all elements initialized to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_ps() -> __m128 { __m128(0.0, 0.0, 0.0, 0.0) } /// A utility function for creating masks to use with Intel shuffle and /// permute intrinsics. #[inline] #[allow(non_snake_case)] #[unstable(feature = "stdsimd", issue = "27731")] pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { ((z << 6) | (y << 4) | (x << 2) | w) as i32 } /// Shuffles packed single-precision (32-bit) floating-point elements in `a` and /// `b` using `mask`. /// /// The lower half of result takes values from `a` and the higher half from /// `b`. Mask is split to 2 control bits each to index the element from inputs. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(shufps, mask = 3))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_ps(a: __m128, b: __m128, mask: u32) -> __m128 { let mask = (mask & 0xFF) as u8; macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { simd_shuffle4(a, b, [$x01, $x23, $x45, $x67]) }; } macro_rules! shuffle_x67 { ($x01:expr, $x23:expr, $x45:expr) => { match (mask >> 6) & 0b11 { 0b00 => shuffle_done!($x01, $x23, $x45, 4), 0b01 => shuffle_done!($x01, $x23, $x45, 5), 0b10 => shuffle_done!($x01, $x23, $x45, 6), _ => shuffle_done!($x01, $x23, $x45, 7), } }; } macro_rules! shuffle_x45 { ($x01:expr, $x23:expr) => { match (mask >> 4) & 0b11 { 0b00 => shuffle_x67!($x01, $x23, 4), 0b01 => shuffle_x67!($x01, $x23, 5), 0b10 => shuffle_x67!($x01, $x23, 6), _ => shuffle_x67!($x01, $x23, 7), } }; } macro_rules! shuffle_x23 { ($x01:expr) => { match (mask >> 2) & 0b11 { 0b00 => shuffle_x45!($x01, 0), 0b01 => shuffle_x45!($x01, 1), 0b10 => shuffle_x45!($x01, 2), _ => shuffle_x45!($x01, 3), } }; } match mask & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), } } /// Unpacks and interleave single-precision (32-bit) floating-point elements /// from the higher half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(unpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [2, 6, 3, 7]) } /// Unpacks and interleave single-precision (32-bit) floating-point elements /// from the lower half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(unpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [0, 4, 1, 5]) } /// Combine higher half of `a` and `b`. The highwe half of `b` occupies the /// lower half of result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehl_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhlps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 { // TODO; figure why this is a different instruction on Windows? simd_shuffle4(a, b, [6, 7, 2, 3]) } /// Combine lower half of `a` and `b`. The lower half of `b` occupies the /// higher half of result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movelh_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [0, 1, 4, 5]) } /// Returns a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 4 least significant bits of the return value. /// All other bits are set to `0`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { movmskps(a) } /// Sets the upper two single-precision floating-point values with 64 bits of /// data loaded from the address `p`; the lower two values are passed through /// from `a`. #[inline] #[target_feature(enable = "sse")] #[cfg_attr( all( test, any( target_arch = "x86_64", all(target_arch = "x86", target_feature = "sse2") ) ), assert_instr(movhpd) )] // FIXME: 32-bit codegen without SSE2 generates two `shufps` instead of `movhps` #[cfg_attr( all(test, target_arch = "x86", not(target_feature = "sse2")), assert_instr(shufps) )] // TODO: this function is actually not limited to floats, but that's what // what matches the C type most closely: `(__m128, *const __m64) -> __m128`. pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 { let q = p as *const f32x2; let b: f32x2 = *q; let bb = simd_shuffle4(b, b, [0, 1, 0, 1]); simd_shuffle4(a, bb, [0, 1, 4, 5]) } /// Loads two floats from `p` into the lower half of a `__m128`. The upper half /// is copied from the upper half of `a`. #[inline] #[target_feature(enable = "sse")] #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movlpd))] #[cfg_attr( all(test, target_arch = "x86", target_feature = "sse2"), assert_instr(movlpd) )] // FIXME: On 32-bit targets without SSE2, it just generates two `movss`... #[cfg_attr( all(test, target_arch = "x86", not(target_feature = "sse2")), assert_instr(movss) )] pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 { let q = p as *const f32x2; let b: f32x2 = *q; let bb = simd_shuffle4(b, b, [0, 1, 0, 1]); simd_shuffle4(a, bb, [4, 5, 2, 3]) } /// Construct a `__m128` with the lowest element read from `p` and the other /// elements set to zero. /// /// This corresponds to instructions `VMOVSS` / `MOVSS`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 { __m128(*p, 0.0, 0.0, 0.0) } /// Construct a `__m128` by duplicating the value read from `p` into all /// elements. /// /// This corresponds to instructions `VMOVSS` / `MOVSS` followed by some /// shuffling. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 { let a = *p; __m128(a, a, a, a) } /// Alias for [`_mm_load1_ps`](fn._mm_load1_ps.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps1) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 { _mm_load1_ps(p) } /// Loads four `f32` values from *aligned* memory into a `__m128`. If the /// pointer is not aligned to a 128-bit boundary (16 bytes) a general /// protection fault will be triggered (fatal program crash). /// /// Use [`_mm_loadu_ps`](fn._mm_loadu_ps.html) for potentially unaligned /// memory. /// /// This corresponds to instructions `VMOVAPS` / `MOVAPS`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 { *(p as *const __m128) } /// Loads four `f32` values from memory into a `__m128`. There are no /// restrictions /// on memory alignment. For aligned memory /// [`_mm_load_ps`](fn._mm_load_ps.html) /// may be faster. /// /// This corresponds to instructions `VMOVUPS` / `MOVUPS`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 { // Note: Using `*p` would require `f32` alignment, but `movups` has no // alignment restrictions. let mut dst = _mm_undefined_ps(); ptr::copy_nonoverlapping( p as *const u8, &mut dst as *mut __m128 as *mut u8, mem::size_of::<__m128>(), ); dst } /// Loads four `f32` values from aligned memory into a `__m128` in reverse /// order. /// /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general /// protection fault will be triggered (fatal program crash). /// /// Functionally equivalent to the following code sequence (assuming `p` /// satisfies the alignment restrictions): /// /// ```text /// let a0 = *p; /// let a1 = *p.offset(1); /// let a2 = *p.offset(2); /// let a3 = *p.offset(3); /// __m128::new(a3, a2, a1, a0) /// ``` /// /// This corresponds to instructions `VMOVAPS` / `MOVAPS` followed by some /// shuffling. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 { let a = _mm_load_ps(p); simd_shuffle4(a, a, [3, 2, 1, 0]) } /// Stores the upper half of `a` (64 bits) into memory. /// /// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may /// choose to generate an equivalent sequence of other instructions. #[inline] #[target_feature(enable = "sse")] // On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's // fine. // On i586 (no SSE2) it just generates plain MOV instructions. #[cfg_attr( all(test, any(target_arch = "x86_64", target_feature = "sse2"), not(target_os = "windows")), // assert_instr(movhpd) assert_instr(movhps) // LLVM7 prefers single-precision instructions )] pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) { #[cfg(target_arch = "x86")] { // If this is a `f64x2` then on i586, LLVM generates fldl & fstpl which // is just silly let a64: u64x2 = mem::transmute(a); let a_hi = a64.extract(1); *(p as *mut u64) = a_hi; } #[cfg(target_arch = "x86_64")] { // If this is a `u64x2` LLVM generates a pshufd + movq, but we really // want a a MOVHPD or MOVHPS here. let a64: f64x2 = mem::transmute(a); let a_hi = a64.extract(1); *p = mem::transmute(a_hi); } } /// Stores the lower half of `a` (64 bits) into memory. /// /// This intrinsic corresponds to the `MOVQ` instruction. The compiler may /// choose to generate an equivalent sequence of other instructions. #[inline] #[target_feature(enable = "sse")] // On i586 the codegen just generates plane MOVs. No need to test for that. #[cfg_attr( all( test, any(target_arch = "x86_64", target_feature = "sse2"), not(target_os = "windows") ), assert_instr(movlps) )] pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) { #[cfg(target_arch = "x86")] { // Same as for _mm_storeh_pi: i586 code gen would use floating point // stack. let a64: u64x2 = mem::transmute(a); let a_hi = a64.extract(0); *(p as *mut u64) = a_hi; } #[cfg(target_arch = "x86_64")] { let a64: f64x2 = mem::transmute(a); let a_hi = a64.extract(0); *p = mem::transmute(a_hi); } } /// Stores the lowest 32 bit float of `a` into memory. /// /// This intrinsic corresponds to the `MOVSS` instruction. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) { *p = simd_extract(a, 0); } /// Stores the lowest 32 bit float of `a` repeated four times into *aligned* /// memory. /// /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general /// protection fault will be triggered (fatal program crash). /// /// Functionally equivalent to the following code sequence (assuming `p` /// satisfies the alignment restrictions): /// /// ```text /// let x = a.extract(0); /// *p = x; /// *p.offset(1) = x; /// *p.offset(2) = x; /// *p.offset(3) = x; /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) { let b: __m128 = simd_shuffle4(a, a, [0, 0, 0, 0]); *(p as *mut __m128) = b; } /// Alias for [`_mm_store1_ps`](fn._mm_store1_ps.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ps1) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) { _mm_store1_ps(p, a); } /// Stores four 32-bit floats into *aligned* memory. /// /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general /// protection fault will be triggered (fatal program crash). /// /// Use [`_mm_storeu_ps`](fn._mm_storeu_ps.html) for potentially unaligned /// memory. /// /// This corresponds to instructions `VMOVAPS` / `MOVAPS`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) { *(p as *mut __m128) = a; } /// Stores four 32-bit floats into memory. There are no restrictions on memory /// alignment. For aligned memory [`_mm_store_ps`](fn._mm_store_ps.html) may be /// faster. /// /// This corresponds to instructions `VMOVUPS` / `MOVUPS`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) { ptr::copy_nonoverlapping( &a as *const __m128 as *const u8, p as *mut u8, mem::size_of::<__m128>(), ); } /// Stores four 32-bit floats into *aligned* memory in reverse order. /// /// If the pointer is not aligned to a 128-bit boundary (16 bytes) a general /// protection fault will be triggered (fatal program crash). /// /// Functionally equivalent to the following code sequence (assuming `p` /// satisfies the alignment restrictions): /// /// ```text /// *p = a.extract(3); /// *p.offset(1) = a.extract(2); /// *p.offset(2) = a.extract(1); /// *p.offset(3) = a.extract(0); /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) { let b: __m128 = simd_shuffle4(a, a, [3, 2, 1, 0]); *(p as *mut __m128) = b; } /// Returns a `__m128` with the first component from `b` and the remaining /// components from `a`. /// /// In other words for any `a` and `b`: /// ```text /// _mm_move_ss(a, b) == a.replace(0, b.extract(0)) /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_ss(a: __m128, b: __m128) -> __m128 { simd_shuffle4(a, b, [4, 1, 2, 3]) } /// Performs a serializing operation on all store-to-memory instructions that /// were issued prior to this instruction. /// /// Guarantees that every store instruction that precedes, in program order, is /// globally visible before any store instruction which follows the fence in /// program order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sfence) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(sfence))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sfence() { sfence() } /// Gets the unsigned 32-bit value of the MXCSR control and status register. /// /// For more info see [`_mm_setcsr`](fn._mm_setcsr.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_getcsr) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(stmxcsr))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_getcsr() -> u32 { let mut result = 0_i32; stmxcsr((&mut result) as *mut _ as *mut i8); result as u32 } /// Sets the MXCSR register with the 32-bit unsigned integer value. /// /// This register constrols how SIMD instructions handle floating point /// operations. Modifying this register only affects the current thread. /// /// It contains several groups of flags: /// /// * *Exception flags* report which exceptions occurred since last they were /// reset. /// /// * *Masking flags* can be used to mask (ignore) certain exceptions. By /// default /// these flags are all set to 1, so all exceptions are masked. When an /// an exception is masked, the processor simply sets the exception flag and /// continues the operation. If the exception is unmasked, the flag is also set /// but additionally an exception handler is invoked. /// /// * *Rounding mode flags* control the rounding mode of floating point /// instructions. /// /// * The *denormals-are-zero mode flag* turns all numbers which would be /// denormalized (exponent bits are all zeros) into zeros. /// /// ## Exception Flags /// /// * `_MM_EXCEPT_INVALID`: An invalid operation was performed (e.g., dividing /// Infinity by Infinity). /// /// * `_MM_EXCEPT_DENORM`: An operation attempted to operate on a denormalized /// number. Mainly this can cause loss of precision. /// /// * `_MM_EXCEPT_DIV_ZERO`: Division by zero occured. /// /// * `_MM_EXCEPT_OVERFLOW`: A numeric overflow exception occured, i.e., a /// result was too large to be represented (e.g., an `f32` with absolute /// value /// greater than `2^128`). /// /// * `_MM_EXCEPT_UNDERFLOW`: A numeric underflow exception occured, i.e., a /// result was too small to be represented in a normalized way (e.g., an /// `f32` /// with absulte value smaller than `2^-126`.) /// /// * `_MM_EXCEPT_INEXACT`: An inexact-result exception occured (a.k.a. /// precision exception). This means some precision was lost due to rounding. /// For example, the fraction `1/3` cannot be represented accurately in a /// 32 or 64 bit float and computing it would cause this exception to be /// raised. Precision exceptions are very common, so they are usually masked. /// /// Exception flags can be read and set using the convenience functions /// `_MM_GET_EXCEPTION_STATE` and `_MM_SET_EXCEPTION_STATE`. For example, to /// check if an operation caused some overflow: /// /// ```rust,ignore /// _MM_SET_EXCEPTION_STATE(0); // clear all exception flags /// // perform calculations /// if _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_OVERFLOW != 0 { /// // handle overflow /// } /// ``` /// /// ## Masking Flags /// /// There is one masking flag for each exception flag: `_MM_MASK_INVALID`, /// `_MM_MASK_DENORM`, `_MM_MASK_DIV_ZERO`, `_MM_MASK_OVERFLOW`, /// `_MM_MASK_UNDERFLOW`, `_MM_MASK_INEXACT`. /// /// A single masking bit can be set via /// /// ```rust,ignore /// _MM_SET_EXCEPTION_MASK(_MM_MASK_UNDERFLOW); /// ``` /// /// However, since mask bits are by default all set to 1, it is more common to /// want to *disable* certain bits. For example, to unmask the underflow /// exception, use: /// /// ```rust,ignore /// _mm_setcsr(_mm_getcsr() & !_MM_MASK_UNDERFLOW); // unmask underflow /// exception /// ``` /// /// Warning: an unmasked exception will cause an exception handler to be /// called. /// The standard handler will simply terminate the process. So, in this case /// any underflow exception would terminate the current process with something /// like `signal: 8, SIGFPE: erroneous arithmetic operation`. /// /// ## Rounding Mode /// /// The rounding mode is describe using two bits. It can be read and set using /// the convenience wrappers `_MM_GET_ROUNDING_MODE()` and /// `_MM_SET_ROUNDING_MODE(mode)`. /// /// The rounding modes are: /// /// * `_MM_ROUND_NEAREST`: (default) Round to closest to the infinite precision /// value. If two values are equally close, round to even (i.e., least /// significant bit will be zero). /// /// * `_MM_ROUND_DOWN`: Round toward negative Infinity. /// /// * `_MM_ROUND_UP`: Round toward positive Infinity. /// /// * `_MM_ROUND_TOWARD_ZERO`: Round towards zero (truncate). /// /// Example: /// /// ```rust,ignore /// _MM_SET_ROUNDING_MODE(_MM_ROUND_DOWN) /// ``` /// /// ## Denormals-are-zero/Flush-to-zero Mode /// /// If this bit is set, values that would be denormalized will be set to zero /// instead. This is turned off by default. /// /// You can read and enable/disable this mode via the helper functions /// `_MM_GET_FLUSH_ZERO_MODE()` and `_MM_SET_FLUSH_ZERO_MODE()`: /// /// ```rust,ignore /// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); // turn off (default) /// _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // turn on /// ``` /// /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setcsr) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(ldmxcsr))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setcsr(val: u32) { ldmxcsr(&val as *const _ as *const i8); } /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_INVALID: u32 = 0x0001; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_DENORM: u32 = 0x0002; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_DIV_ZERO: u32 = 0x0004; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_OVERFLOW: u32 = 0x0008; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_UNDERFLOW: u32 = 0x0010; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_INEXACT: u32 = 0x0020; /// See [`_MM_GET_EXCEPTION_STATE`](fn._MM_GET_EXCEPTION_STATE.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_EXCEPT_MASK: u32 = 0x003f; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_INVALID: u32 = 0x0080; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_DENORM: u32 = 0x0100; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_DIV_ZERO: u32 = 0x0200; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_OVERFLOW: u32 = 0x0400; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_UNDERFLOW: u32 = 0x0800; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_INEXACT: u32 = 0x1000; /// See [`_MM_GET_EXCEPTION_MASK`](fn._MM_GET_EXCEPTION_MASK.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_MASK_MASK: u32 = 0x1f80; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_NEAREST: u32 = 0x0000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_DOWN: u32 = 0x2000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_UP: u32 = 0x4000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_TOWARD_ZERO: u32 = 0x6000; /// See [`_MM_GET_ROUNDING_MODE`](fn._MM_GET_ROUNDING_MODE.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_ROUND_MASK: u32 = 0x6000; /// See [`_MM_GET_FLUSH_ZERO_MODE`](fn._MM_GET_FLUSH_ZERO_MODE.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FLUSH_ZERO_MASK: u32 = 0x8000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FLUSH_ZERO_ON: u32 = 0x8000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FLUSH_ZERO_OFF: u32 = 0x0000; /// See [`_mm_setcsr`](fn._mm_setcsr.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_EXCEPTION_MASK) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_GET_EXCEPTION_MASK() -> u32 { _mm_getcsr() & _MM_MASK_MASK } /// See [`_mm_setcsr`](fn._mm_setcsr.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_EXCEPTION_STATE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_GET_EXCEPTION_STATE() -> u32 { _mm_getcsr() & _MM_EXCEPT_MASK } /// See [`_mm_setcsr`](fn._mm_setcsr.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_FLUSH_ZERO_MODE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_GET_FLUSH_ZERO_MODE() -> u32 { _mm_getcsr() & _MM_FLUSH_ZERO_MASK } /// See [`_mm_setcsr`](fn._mm_setcsr.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_GET_ROUNDING_MODE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_GET_ROUNDING_MODE() -> u32 { _mm_getcsr() & _MM_ROUND_MASK } /// See [`_mm_setcsr`](fn._mm_setcsr.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_EXCEPTION_MASK) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_SET_EXCEPTION_MASK(x: u32) { _mm_setcsr((_mm_getcsr() & !_MM_MASK_MASK) | x) } /// See [`_mm_setcsr`](fn._mm_setcsr.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_EXCEPTION_STATE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_SET_EXCEPTION_STATE(x: u32) { _mm_setcsr((_mm_getcsr() & !_MM_EXCEPT_MASK) | x) } /// See [`_mm_setcsr`](fn._mm_setcsr.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_FLUSH_ZERO_MODE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_SET_FLUSH_ZERO_MODE(x: u32) { let val = (_mm_getcsr() & !_MM_FLUSH_ZERO_MASK) | x; // println!("setting csr={:x}", val); _mm_setcsr(val) } /// See [`_mm_setcsr`](fn._mm_setcsr.html) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_SET_ROUNDING_MODE) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_SET_ROUNDING_MODE(x: u32) { _mm_setcsr((_mm_getcsr() & !_MM_ROUND_MASK) | x) } /// See [`_mm_prefetch`](fn._mm_prefetch.html). #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_HINT_T0: i32 = 3; /// See [`_mm_prefetch`](fn._mm_prefetch.html). #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_HINT_T1: i32 = 2; /// See [`_mm_prefetch`](fn._mm_prefetch.html). #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_HINT_T2: i32 = 1; /// See [`_mm_prefetch`](fn._mm_prefetch.html). #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_HINT_NTA: i32 = 0; /// Fetch the cache line that contains address `p` using the given `strategy`. /// /// The `strategy` must be one of: /// /// * [`_MM_HINT_T0`](constant._MM_HINT_T0.html): Fetch into all levels of the /// cache hierachy. /// /// * [`_MM_HINT_T1`](constant._MM_HINT_T1.html): Fetch into L2 and higher. /// /// * [`_MM_HINT_T2`](constant._MM_HINT_T2.html): Fetch into L3 and higher or /// an implementation-specific choice (e.g., L2 if there is no L3). /// /// * [`_MM_HINT_NTA`](constant._MM_HINT_NTA.html): Fetch data using the /// non-temporal access (NTA) hint. It may be a place closer than main memory /// but outside of the cache hierarchy. This is used to reduce access latency /// without polluting the cache. /// /// The actual implementation depends on the particular CPU. This instruction /// is considered a hint, so the CPU is also free to simply ignore the request. /// /// The amount of prefetched data depends on the cache line size of the /// specific CPU, but it will be at least 32 bytes. /// /// Common caveats: /// /// * Most modern CPUs already automatically prefetch data based on predicted /// access patterns. /// /// * Data is usually not fetched if this would cause a TLB miss or a page /// fault. /// /// * Too much prefetching can cause unnecessary cache evictions. /// /// * Prefetching may also fail if there are not enough memory-subsystem /// resources (e.g., request buffers). /// /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_prefetch) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(prefetcht0, strategy = _MM_HINT_T0))] #[cfg_attr(test, assert_instr(prefetcht1, strategy = _MM_HINT_T1))] #[cfg_attr(test, assert_instr(prefetcht2, strategy = _MM_HINT_T2))] #[cfg_attr(test, assert_instr(prefetchnta, strategy = _MM_HINT_NTA))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) { // The `strategy` must be a compile-time constant, so we use a short form // of `constify_imm8!` for now. // We use the `llvm.prefetch` instrinsic with `rw` = 0 (read), and // `cache type` = 1 (data cache). `locality` is based on our `strategy`. macro_rules! pref { ($imm8:expr) => { match $imm8 { 0 => prefetch(p, 0, 0, 1), 1 => prefetch(p, 0, 1, 1), 2 => prefetch(p, 0, 2, 1), _ => prefetch(p, 0, 3, 1), } }; } pref!(strategy) } /// Returns vector of type __m128 with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ps) #[inline] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_ps() -> __m128 { // FIXME: this function should return MaybeUninit<__m128> mem::MaybeUninit::<__m128>::uninit().assume_init() } /// Transpose the 4x4 matrix formed by 4 rows of __m128 in place. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_MM_TRANSPOSE4_PS) #[inline] #[allow(non_snake_case)] #[target_feature(enable = "sse")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _MM_TRANSPOSE4_PS( row0: &mut __m128, row1: &mut __m128, row2: &mut __m128, row3: &mut __m128, ) { let tmp0 = _mm_unpacklo_ps(*row0, *row1); let tmp2 = _mm_unpacklo_ps(*row2, *row3); let tmp1 = _mm_unpackhi_ps(*row0, *row1); let tmp3 = _mm_unpackhi_ps(*row2, *row3); *row0 = _mm_movelh_ps(tmp0, tmp2); *row1 = _mm_movehl_ps(tmp2, tmp0); *row2 = _mm_movelh_ps(tmp1, tmp3); *row3 = _mm_movehl_ps(tmp3, tmp1); } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse.add.ss"] fn addss(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.sub.ss"] fn subss(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.mul.ss"] fn mulss(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.div.ss"] fn divss(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.sqrt.ss"] fn sqrtss(a: __m128) -> __m128; #[link_name = "llvm.x86.sse.sqrt.ps"] fn sqrtps(a: __m128) -> __m128; #[link_name = "llvm.x86.sse.rcp.ss"] fn rcpss(a: __m128) -> __m128; #[link_name = "llvm.x86.sse.rcp.ps"] fn rcpps(a: __m128) -> __m128; #[link_name = "llvm.x86.sse.rsqrt.ss"] fn rsqrtss(a: __m128) -> __m128; #[link_name = "llvm.x86.sse.rsqrt.ps"] fn rsqrtps(a: __m128) -> __m128; #[link_name = "llvm.x86.sse.min.ss"] fn minss(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.min.ps"] fn minps(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.max.ss"] fn maxss(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.max.ps"] fn maxps(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse.movmsk.ps"] fn movmskps(a: __m128) -> i32; #[link_name = "llvm.x86.sse.cmp.ps"] fn cmpps(a: __m128, b: __m128, imm8: i8) -> __m128; #[link_name = "llvm.x86.sse.comieq.ss"] fn comieq_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.comilt.ss"] fn comilt_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.comile.ss"] fn comile_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.comigt.ss"] fn comigt_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.comige.ss"] fn comige_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.comineq.ss"] fn comineq_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.ucomieq.ss"] fn ucomieq_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.ucomilt.ss"] fn ucomilt_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.ucomile.ss"] fn ucomile_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.ucomigt.ss"] fn ucomigt_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.ucomige.ss"] fn ucomige_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.ucomineq.ss"] fn ucomineq_ss(a: __m128, b: __m128) -> i32; #[link_name = "llvm.x86.sse.cvtss2si"] fn cvtss2si(a: __m128) -> i32; #[link_name = "llvm.x86.sse.cvttss2si"] fn cvttss2si(a: __m128) -> i32; #[link_name = "llvm.x86.sse.cvtsi2ss"] fn cvtsi2ss(a: __m128, b: i32) -> __m128; #[link_name = "llvm.x86.sse.sfence"] fn sfence(); #[link_name = "llvm.x86.sse.stmxcsr"] fn stmxcsr(p: *mut i8); #[link_name = "llvm.x86.sse.ldmxcsr"] fn ldmxcsr(p: *const i8); #[link_name = "llvm.prefetch"] fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32); #[link_name = "llvm.x86.sse.cmp.ss"] fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128; #[link_name = "llvm.x86.mmx.movnt.dq"] fn movntdq(a: *mut __m64, b: __m64); #[link_name = "llvm.x86.sse.cvtpi2ps"] fn cvtpi2ps(a: __m128, b: __m64) -> __m128; #[link_name = "llvm.x86.mmx.maskmovq"] fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8); #[link_name = "llvm.x86.mmx.pextr.w"] fn pextrw(a: __m64, imm8: i32) -> i32; #[link_name = "llvm.x86.mmx.pinsr.w"] fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64; #[link_name = "llvm.x86.mmx.pmovmskb"] fn pmovmskb(a: __m64) -> i32; #[link_name = "llvm.x86.sse.pshuf.w"] fn pshufw(a: __m64, imm8: i8) -> __m64; #[link_name = "llvm.x86.mmx.pmaxs.w"] fn pmaxsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pmaxu.b"] fn pmaxub(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pmins.w"] fn pminsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pminu.b"] fn pminub(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pmulhu.w"] fn pmulhuw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pmull.w"] fn pmullw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pavg.b"] fn pavgb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pavg.w"] fn pavgw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psad.bw"] fn psadbw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.sse.cvtps2pi"] fn cvtps2pi(a: __m128) -> __m64; #[link_name = "llvm.x86.sse.cvttps2pi"] fn cvttps2pi(a: __m128) -> __m64; } /// Stores `a` into the memory at `mem_addr` using a non-temporal memory hint. /// /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection /// exception _may_ be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_ps) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(movntps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) { intrinsics::nontemporal_store(mem_addr as *mut __m128, a); } /// Stores 64-bits of integer data from a into memory using a non-temporal /// memory hint. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(movntq))] pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) { movntdq(mem_addr, a) } /// Compares the packed 16-bit signed integers of `a` and `b` writing the /// greatest value into the result. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pmaxsw))] pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 { pmaxsw(a, b) } /// Compares the packed 16-bit signed integers of `a` and `b` writing the /// greatest value into the result. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pmaxsw))] pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 { _mm_max_pi16(a, b) } /// Compares the packed 8-bit signed integers of `a` and `b` writing the /// greatest value into the result. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pmaxub))] pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 { pmaxub(a, b) } /// Compares the packed 8-bit signed integers of `a` and `b` writing the /// greatest value into the result. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pmaxub))] pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 { _mm_max_pu8(a, b) } /// Compares the packed 16-bit signed integers of `a` and `b` writing the /// smallest value into the result. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pminsw))] pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 { pminsw(a, b) } /// Compares the packed 16-bit signed integers of `a` and `b` writing the /// smallest value into the result. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pminsw))] pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 { _mm_min_pi16(a, b) } /// Compares the packed 8-bit signed integers of `a` and `b` writing the /// smallest value into the result. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pminub))] pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 { pminub(a, b) } /// Compares the packed 8-bit signed integers of `a` and `b` writing the /// smallest value into the result. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pminub))] pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 { _mm_min_pu8(a, b) } /// Multiplies packed 16-bit unsigned integer values and writes the /// high-order 16 bits of each 32-bit product to the corresponding bits in /// the destination. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pmulhuw))] pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 { pmulhuw(a, b) } /// Multiplies packed 16-bit integer values and writes the /// low-order 16 bits of each 32-bit product to the corresponding bits in /// the destination. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pmullw))] pub unsafe fn _mm_mullo_pi16(a: __m64, b: __m64) -> __m64 { pmullw(a, b) } /// Multiplies packed 16-bit unsigned integer values and writes the /// high-order 16 bits of each 32-bit product to the corresponding bits in /// the destination. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pmulhuw))] pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 { _mm_mulhi_pu16(a, b) } /// Computes the rounded averages of the packed unsigned 8-bit integer /// values and writes the averages to the corresponding bits in the /// destination. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pavgb))] pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 { pavgb(a, b) } /// Computes the rounded averages of the packed unsigned 8-bit integer /// values and writes the averages to the corresponding bits in the /// destination. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pavgb))] pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 { _mm_avg_pu8(a, b) } /// Computes the rounded averages of the packed unsigned 16-bit integer /// values and writes the averages to the corresponding bits in the /// destination. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pavgw))] pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 { pavgw(a, b) } /// Computes the rounded averages of the packed unsigned 16-bit integer /// values and writes the averages to the corresponding bits in the /// destination. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pavgw))] pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 { _mm_avg_pu16(a, b) } /// Subtracts the corresponding 8-bit unsigned integer values of the two /// 64-bit vector operands and computes the absolute value for each of the /// difference. Then sum of the 8 absolute differences is written to the /// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(psadbw))] pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 { psadbw(a, b) } /// Subtracts the corresponding 8-bit unsigned integer values of the two /// 64-bit vector operands and computes the absolute value for each of the /// difference. Then sum of the 8 absolute differences is written to the /// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(psadbw))] pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 { _mm_sad_pu8(a, b) } /// Converts two elements of a 64-bit vector of `[2 x i32]` into two /// floating point values and writes them to the lower 64-bits of the /// destination. The remaining higher order elements of the destination are /// copied from the corresponding elements in the first operand. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtpi2ps))] pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 { cvtpi2ps(a, b) } /// Converts two elements of a 64-bit vector of `[2 x i32]` into two /// floating point values and writes them to the lower 64-bits of the /// destination. The remaining higher order elements of the destination are /// copied from the corresponding elements in the first operand. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtpi2ps))] pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 { _mm_cvtpi32_ps(a, b) } /// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtpi2ps))] pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 { let b = _mm_setzero_si64(); let b = _mm_cmpgt_pi8(b, a); let b = _mm_unpacklo_pi8(a, b); _mm_cvtpi16_ps(b) } /// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtpi2ps))] pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 { let b = _mm_setzero_si64(); let b = _mm_unpacklo_pi8(a, b); _mm_cvtpi16_ps(b) } /// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtpi2ps))] pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 { let b = _mm_setzero_si64(); let b = _mm_cmpgt_pi16(b, a); let c = _mm_unpackhi_pi16(a, b); let r = _mm_setzero_ps(); let r = cvtpi2ps(r, c); let r = _mm_movelh_ps(r, r); let c = _mm_unpacklo_pi16(a, b); cvtpi2ps(r, c) } /// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtpi2ps))] pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 { let b = _mm_setzero_si64(); let c = _mm_unpackhi_pi16(a, b); let r = _mm_setzero_ps(); let r = cvtpi2ps(r, c); let r = _mm_movelh_ps(r, r); let c = _mm_unpacklo_pi16(a, b); cvtpi2ps(r, c) } /// Converts the two 32-bit signed integer values from each 64-bit vector /// operand of `[2 x i32]` into a 128-bit vector of `[4 x float]`. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtpi2ps))] pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 { let c = _mm_setzero_ps(); let c = _mm_cvtpi32_ps(c, b); let c = _mm_movelh_ps(c, c); _mm_cvtpi32_ps(c, a) } /// Conditionally copies the values from each 8-bit element in the first /// 64-bit integer vector operand to the specified memory location, as /// specified by the most significant bit in the corresponding element in the /// second 64-bit integer vector operand. /// /// To minimize caching, the data is flagged as non-temporal /// (unlikely to be used again soon). #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(maskmovq))] pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) { maskmovq(a, mask, mem_addr) } /// Conditionally copies the values from each 8-bit element in the first /// 64-bit integer vector operand to the specified memory location, as /// specified by the most significant bit in the corresponding element in the /// second 64-bit integer vector operand. /// /// To minimize caching, the data is flagged as non-temporal /// (unlikely to be used again soon). #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(maskmovq))] pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) { _mm_maskmove_si64(a, mask, mem_addr) } /// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and /// returns it, as specified by the immediate integer operand. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] #[rustc_args_required_const(1)] pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 { macro_rules! call { ($imm2:expr) => { pextrw(a, $imm2) as i32 }; } constify_imm2!(imm2, call) } /// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and /// returns it, as specified by the immediate integer operand. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] #[rustc_args_required_const(1)] pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 { macro_rules! call { ($imm2:expr) => { pextrw(a, $imm2) as i32 }; } constify_imm2!(imm2, call) } /// Copies data from the 64-bit vector of `[4 x i16]` to the destination, /// and inserts the lower 16-bits of an integer operand at the 16-bit offset /// specified by the immediate operand `n`. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] #[rustc_args_required_const(2)] pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 { macro_rules! call { ($imm2:expr) => { pinsrw(a, d, $imm2) }; } constify_imm2!(imm2, call) } /// Copies data from the 64-bit vector of `[4 x i16]` to the destination, /// and inserts the lower 16-bits of an integer operand at the 16-bit offset /// specified by the immediate operand `n`. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] #[rustc_args_required_const(2)] pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 { macro_rules! call { ($imm2:expr) => { pinsrw(a, d, $imm2) }; } constify_imm2!(imm2, call) } /// Takes the most significant bit from each 8-bit element in a 64-bit /// integer vector to create a 16-bit mask value. Zero-extends the value to /// 32-bit integer and writes it to the destination. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pmovmskb))] pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 { pmovmskb(a) } /// Takes the most significant bit from each 8-bit element in a 64-bit /// integer vector to create a 16-bit mask value. Zero-extends the value to /// 32-bit integer and writes it to the destination. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pmovmskb))] pub unsafe fn _m_pmovmskb(a: __m64) -> i32 { _mm_movemask_pi8(a) } /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the /// destination, as specified by the immediate value operand. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] #[rustc_args_required_const(1)] pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 { macro_rules! call { ($imm8:expr) => { pshufw(a, $imm8) }; } constify_imm8!(imm8, call) } /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the /// destination, as specified by the immediate value operand. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] #[rustc_args_required_const(1)] pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 { macro_rules! call { ($imm8:expr) => { pshufw(a, $imm8) }; } constify_imm8!(imm8, call) } /// Converts the two lower packed single-precision (32-bit) floating-point /// elements in `a` to packed 32-bit integers with truncation. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvttps2pi))] pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 { cvttps2pi(a) } /// Converts the two lower packed single-precision (32-bit) floating-point /// elements in `a` to packed 32-bit integers with truncation. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvttps2pi))] pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 { _mm_cvttps_pi32(a) } /// Converts the two lower packed single-precision (32-bit) floating-point /// elements in `a` to packed 32-bit integers. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtps2pi))] pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 { cvtps2pi(a) } /// Converts the two lower packed single-precision (32-bit) floating-point /// elements in `a` to packed 32-bit integers. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtps2pi))] pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 { _mm_cvtps_pi32(a) } /// Converts packed single-precision (32-bit) floating-point elements in `a` to /// packed 16-bit integers. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtps2pi))] pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 { let b = _mm_cvtps_pi32(a); let a = _mm_movehl_ps(a, a); let c = _mm_cvtps_pi32(a); _mm_packs_pi32(b, c) } /// Converts packed single-precision (32-bit) floating-point elements in `a` to /// packed 8-bit integers, and returns theem in the lower 4 elements of the /// result. #[inline] #[target_feature(enable = "sse,mmx")] #[cfg_attr(test, assert_instr(cvtps2pi))] pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 { let b = _mm_cvtps_pi16(a); let c = _mm_setzero_si64(); _mm_packs_pi16(b, c) } #[cfg(test)] mod tests { use crate::mem::transmute; use std::f32::NAN; use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. use crate::core_arch::{simd::*, x86::*}; #[simd_test(enable = "sse")] unsafe fn test_mm_add_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_add_ps(a, b); assert_eq_m128(r, _mm_setr_ps(-101.0, 25.0, 0.0, -15.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_add_ss() { let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_add_ss(a, b); assert_eq_m128(r, _mm_set_ps(-1.0, 5.0, 0.0, -15.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_sub_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_sub_ps(a, b); assert_eq_m128(r, _mm_setr_ps(99.0, -15.0, 0.0, -5.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_sub_ss() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_sub_ss(a, b); assert_eq_m128(r, _mm_setr_ps(99.0, 5.0, 0.0, -10.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_mul_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_mul_ps(a, b); assert_eq_m128(r, _mm_setr_ps(100.0, 100.0, 0.0, 50.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_mul_ss() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_mul_ss(a, b); assert_eq_m128(r, _mm_setr_ps(100.0, 5.0, 0.0, -10.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_div_ps() { let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0); let r = _mm_div_ps(a, b); assert_eq_m128(r, _mm_setr_ps(0.01, 0.25, 10.0, 2.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_div_ss() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_div_ss(a, b); assert_eq_m128(r, _mm_setr_ps(0.01, 5.0, 0.0, -10.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_sqrt_ss() { let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); let r = _mm_sqrt_ss(a); let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0); assert_eq_m128(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_sqrt_ps() { let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); let r = _mm_sqrt_ps(a); let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0); assert_eq_m128(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_rcp_ss() { let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); let r = _mm_rcp_ss(a); let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0); assert_eq_m128(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_rcp_ps() { let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); let r = _mm_rcp_ps(a); let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215); let rel_err = 0.00048828125; for i in 0..4 { assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err); } } #[simd_test(enable = "sse")] unsafe fn test_mm_rsqrt_ss() { let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); let r = _mm_rsqrt_ss(a); let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0); let rel_err = 0.00048828125; for i in 0..4 { assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err); } } #[simd_test(enable = "sse")] unsafe fn test_mm_rsqrt_ps() { let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0); let r = _mm_rsqrt_ps(a); let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845); let rel_err = 0.00048828125; for i in 0..4 { assert_approx_eq!(get_m128(r, i), get_m128(e, i), 2. * rel_err); } } #[simd_test(enable = "sse")] unsafe fn test_mm_min_ss() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_min_ss(a, b); assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_min_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_min_ps(a, b); assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_max_ss() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_max_ss(a, b); assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, -10.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_max_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_max_ps(a, b); assert_eq_m128(r, _mm_setr_ps(-1.0, 20.0, 0.0, -5.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_and_ps() { let a = transmute(u32x4::splat(0b0011)); let b = transmute(u32x4::splat(0b0101)); let r = _mm_and_ps(*black_box(&a), *black_box(&b)); let e = transmute(u32x4::splat(0b0001)); assert_eq_m128(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_andnot_ps() { let a = transmute(u32x4::splat(0b0011)); let b = transmute(u32x4::splat(0b0101)); let r = _mm_andnot_ps(*black_box(&a), *black_box(&b)); let e = transmute(u32x4::splat(0b0100)); assert_eq_m128(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_or_ps() { let a = transmute(u32x4::splat(0b0011)); let b = transmute(u32x4::splat(0b0101)); let r = _mm_or_ps(*black_box(&a), *black_box(&b)); let e = transmute(u32x4::splat(0b0111)); assert_eq_m128(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_xor_ps() { let a = transmute(u32x4::splat(0b0011)); let b = transmute(u32x4::splat(0b0101)); let r = _mm_xor_ps(*black_box(&a), *black_box(&b)); let e = transmute(u32x4::splat(0b0110)); assert_eq_m128(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpeq_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0); let r: u32x4 = transmute(_mm_cmpeq_ss(a, b)); let e: u32x4 = transmute(_mm_setr_ps(transmute(0u32), 2.0, 3.0, 4.0)); assert_eq!(r, e); let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2)); let e2: u32x4 = transmute(_mm_setr_ps(transmute(0xffffffffu32), 2.0, 3.0, 4.0)); assert_eq!(r2, e2); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmplt_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = 0u32; // a.extract(0) < b.extract(0) let c1 = 0u32; // a.extract(0) < c.extract(0) let d1 = !0u32; // a.extract(0) < d.extract(0) let rb: u32x4 = transmute(_mm_cmplt_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmplt_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmplt_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmple_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = 0u32; // a.extract(0) <= b.extract(0) let c1 = !0u32; // a.extract(0) <= c.extract(0) let d1 = !0u32; // a.extract(0) <= d.extract(0) let rb: u32x4 = transmute(_mm_cmple_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmple_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmple_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpgt_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = !0u32; // a.extract(0) > b.extract(0) let c1 = 0u32; // a.extract(0) > c.extract(0) let d1 = 0u32; // a.extract(0) > d.extract(0) let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpge_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = !0u32; // a.extract(0) >= b.extract(0) let c1 = !0u32; // a.extract(0) >= c.extract(0) let d1 = 0u32; // a.extract(0) >= d.extract(0) let rb: u32x4 = transmute(_mm_cmpge_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmpge_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmpge_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpneq_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = !0u32; // a.extract(0) != b.extract(0) let c1 = 0u32; // a.extract(0) != c.extract(0) let d1 = !0u32; // a.extract(0) != d.extract(0) let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpnlt_ss() { // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there // must be a difference. It may have to do with behavior in the // presence of NaNs (signaling or quiet). If so, we should add tests // for those. let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = !0u32; // a.extract(0) >= b.extract(0) let c1 = !0u32; // a.extract(0) >= c.extract(0) let d1 = 0u32; // a.extract(0) >= d.extract(0) let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpnle_ss() { // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there // must be a difference. It may have to do with behavior in the // presence // of NaNs (signaling or quiet). If so, we should add tests for those. let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = !0u32; // a.extract(0) > b.extract(0) let c1 = 0u32; // a.extract(0) > c.extract(0) let d1 = 0u32; // a.extract(0) > d.extract(0) let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpngt_ss() { // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there // must be a difference. It may have to do with behavior in the // presence of NaNs (signaling or quiet). If so, we should add tests // for those. let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = 0u32; // a.extract(0) <= b.extract(0) let c1 = !0u32; // a.extract(0) <= c.extract(0) let d1 = !0u32; // a.extract(0) <= d.extract(0) let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpnge_ss() { // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there // must be a difference. It may have to do with behavior in the // presence of NaNs (signaling or quiet). If so, we should add tests // for those. let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = 0u32; // a.extract(0) < b.extract(0) let c1 = 0u32; // a.extract(0) < c.extract(0) let d1 = !0u32; // a.extract(0) < d.extract(0) let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpord_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = !0u32; // a.extract(0) ord b.extract(0) let c1 = 0u32; // a.extract(0) ord c.extract(0) let d1 = !0u32; // a.extract(0) ord d.extract(0) let rb: u32x4 = transmute(_mm_cmpord_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmpord_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmpord_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpunord_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0); let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0); let d = _mm_setr_ps(2.0, 5.0, 6.0, 7.0); let b1 = 0u32; // a.extract(0) unord b.extract(0) let c1 = !0u32; // a.extract(0) unord c.extract(0) let d1 = 0u32; // a.extract(0) unord d.extract(0) let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b)); let eb: u32x4 = transmute(_mm_setr_ps(transmute(b1), 2.0, 3.0, 4.0)); assert_eq!(rb, eb); let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c)); let ec: u32x4 = transmute(_mm_setr_ps(transmute(c1), 2.0, 3.0, 4.0)); assert_eq!(rc, ec); let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d)); let ed: u32x4 = transmute(_mm_setr_ps(transmute(d1), 2.0, 3.0, 4.0)); assert_eq!(rd, ed); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpeq_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); let tru = !0u32; let fls = 0u32; let e = u32x4::new(fls, fls, tru, fls); let r: u32x4 = transmute(_mm_cmpeq_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmplt_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); let tru = !0u32; let fls = 0u32; let e = u32x4::new(tru, fls, fls, fls); let r: u32x4 = transmute(_mm_cmplt_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmple_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0); let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); let tru = !0u32; let fls = 0u32; let e = u32x4::new(tru, fls, tru, fls); let r: u32x4 = transmute(_mm_cmple_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpgt_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0); let tru = !0u32; let fls = 0u32; let e = u32x4::new(fls, tru, fls, fls); let r: u32x4 = transmute(_mm_cmpgt_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpge_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0); let tru = !0u32; let fls = 0u32; let e = u32x4::new(fls, tru, tru, fls); let r: u32x4 = transmute(_mm_cmpge_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpneq_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN); let tru = !0u32; let fls = 0u32; let e = u32x4::new(tru, tru, fls, tru); let r: u32x4 = transmute(_mm_cmpneq_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpnlt_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); let tru = !0u32; let fls = 0u32; let e = u32x4::new(fls, tru, tru, tru); let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpnle_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); let tru = !0u32; let fls = 0u32; let e = u32x4::new(fls, tru, fls, tru); let r: u32x4 = transmute(_mm_cmpnle_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpngt_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); let tru = !0u32; let fls = 0u32; let e = u32x4::new(tru, fls, tru, tru); let r: u32x4 = transmute(_mm_cmpngt_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpnge_ps() { let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN); let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0); let tru = !0u32; let fls = 0u32; let e = u32x4::new(tru, fls, fls, tru); let r: u32x4 = transmute(_mm_cmpnge_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpord_ps() { let a = _mm_setr_ps(10.0, 50.0, NAN, NAN); let b = _mm_setr_ps(15.0, NAN, 1.0, NAN); let tru = !0u32; let fls = 0u32; let e = u32x4::new(tru, fls, fls, fls); let r: u32x4 = transmute(_mm_cmpord_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_cmpunord_ps() { let a = _mm_setr_ps(10.0, 50.0, NAN, NAN); let b = _mm_setr_ps(15.0, NAN, 1.0, NAN); let tru = !0u32; let fls = 0u32; let e = u32x4::new(fls, tru, tru, tru); let r: u32x4 = transmute(_mm_cmpunord_ps(a, b)); assert_eq!(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_comieq_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[1i32, 0, 0, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_comieq_ss(a, b); assert_eq!( ee[i], r, "_mm_comieq_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_comilt_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[0i32, 1, 0, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_comilt_ss(a, b); assert_eq!( ee[i], r, "_mm_comilt_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_comile_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[1i32, 1, 0, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_comile_ss(a, b); assert_eq!( ee[i], r, "_mm_comile_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_comigt_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[1i32, 0, 1, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_comige_ss(a, b); assert_eq!( ee[i], r, "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_comineq_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[0i32, 1, 1, 1]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_comineq_ss(a, b); assert_eq!( ee[i], r, "_mm_comineq_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_ucomieq_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[1i32, 0, 0, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_ucomieq_ss(a, b); assert_eq!( ee[i], r, "_mm_ucomieq_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_ucomilt_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[0i32, 1, 0, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_ucomilt_ss(a, b); assert_eq!( ee[i], r, "_mm_ucomilt_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_ucomile_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[1i32, 1, 0, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_ucomile_ss(a, b); assert_eq!( ee[i], r, "_mm_ucomile_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_ucomigt_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[0i32, 0, 1, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_ucomigt_ss(a, b); assert_eq!( ee[i], r, "_mm_ucomigt_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_ucomige_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[1i32, 0, 1, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_ucomige_ss(a, b); assert_eq!( ee[i], r, "_mm_ucomige_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_ucomineq_ss() { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; let ee = &[0i32, 1, 1, 1]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); let r = _mm_ucomineq_ss(a, b); assert_eq!( ee[i], r, "_mm_ucomineq_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r, ee[i], i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_comieq_ss_vs_ucomieq_ss() { // If one of the arguments is a quiet NaN `comieq_ss` should signal an // Invalid Operation Exception while `ucomieq_ss` should not. let aa = &[3.0f32, NAN, 23.0, NAN]; let bb = &[3.0f32, 47.5, NAN, NAN]; let ee = &[1i32, 0, 0, 0]; let exc = &[0u32, 1, 1, 1]; // Should comieq_ss signal an exception? for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); _MM_SET_EXCEPTION_STATE(0); let r1 = _mm_comieq_ss(*black_box(&a), b); let s1 = _MM_GET_EXCEPTION_STATE(); _MM_SET_EXCEPTION_STATE(0); let r2 = _mm_ucomieq_ss(*black_box(&a), b); let s2 = _MM_GET_EXCEPTION_STATE(); assert_eq!( ee[i], r1, "_mm_comeq_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r1, ee[i], i ); assert_eq!( ee[i], r2, "_mm_ucomeq_ss({:?}, {:?}) = {}, expected: {} (i={})", a, b, r2, ee[i], i ); assert_eq!( s1, exc[i] * _MM_EXCEPT_INVALID, "_mm_comieq_ss() set exception flags: {} (i={})", s1, i ); assert_eq!( s2, 0, // ucomieq_ss should not signal an exception "_mm_ucomieq_ss() set exception flags: {} (i={})", s2, i ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_cvtss_si32() { let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1]; let result = &[42i32, -3, i32::min_value(), 0, i32::min_value(), 2147483520]; for i in 0..inputs.len() { let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0); let e = result[i]; let r = _mm_cvtss_si32(x); assert_eq!( e, r, "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}", i, x, r, e ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_cvttss_si32() { let inputs = &[ (42.0f32, 42i32), (-31.4, -31), (-33.5, -33), (-34.5, -34), (10.999, 10), (-5.99, -5), (4.0e10, i32::min_value()), (4.0e-10, 0), (NAN, i32::min_value()), (2147483500.1, 2147483520), ]; for i in 0..inputs.len() { let (xi, e) = inputs[i]; let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); let r = _mm_cvttss_si32(x); assert_eq!( e, r, "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}", i, x, r, e ); } } #[simd_test(enable = "sse")] pub unsafe fn test_mm_cvtsi32_ss() { let inputs = &[ (4555i32, 4555.0f32), (322223333, 322223330.0), (-432, -432.0), (-322223333, -322223330.0), ]; for i in 0..inputs.len() { let (x, f) = inputs[i]; let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); let r = _mm_cvtsi32_ss(a, x); let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); assert_eq_m128(e, r); } } #[simd_test(enable = "sse")] pub unsafe fn test_mm_cvtss_f32() { let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0); assert_eq!(_mm_cvtss_f32(a), 312.0134); } #[simd_test(enable = "sse")] unsafe fn test_mm_set_ss() { let r = _mm_set_ss(black_box(4.25)); assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_set1_ps() { let r1 = _mm_set1_ps(black_box(4.25)); let r2 = _mm_set_ps1(black_box(4.25)); assert_eq!(get_m128(r1, 0), 4.25); assert_eq!(get_m128(r1, 1), 4.25); assert_eq!(get_m128(r1, 2), 4.25); assert_eq!(get_m128(r1, 3), 4.25); assert_eq!(get_m128(r2, 0), 4.25); assert_eq!(get_m128(r2, 1), 4.25); assert_eq!(get_m128(r2, 2), 4.25); assert_eq!(get_m128(r2, 3), 4.25); } #[simd_test(enable = "sse")] unsafe fn test_mm_set_ps() { let r = _mm_set_ps( black_box(1.0), black_box(2.0), black_box(3.0), black_box(4.0), ); assert_eq!(get_m128(r, 0), 4.0); assert_eq!(get_m128(r, 1), 3.0); assert_eq!(get_m128(r, 2), 2.0); assert_eq!(get_m128(r, 3), 1.0); } #[simd_test(enable = "sse")] unsafe fn test_mm_setr_ps() { let r = _mm_setr_ps( black_box(1.0), black_box(2.0), black_box(3.0), black_box(4.0), ); assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_setzero_ps() { let r = *black_box(&_mm_setzero_ps()); assert_eq_m128(r, _mm_set1_ps(0.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_shuffle() { assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11); assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00); assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01); } #[simd_test(enable = "sse")] unsafe fn test_mm_shuffle_ps() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); let r = _mm_shuffle_ps(a, b, 0b00_01_01_11); assert_eq_m128(r, _mm_setr_ps(4.0, 2.0, 6.0, 5.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_unpackhi_ps() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); let r = _mm_unpackhi_ps(a, b); assert_eq_m128(r, _mm_setr_ps(3.0, 7.0, 4.0, 8.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_unpacklo_ps() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); let r = _mm_unpacklo_ps(a, b); assert_eq_m128(r, _mm_setr_ps(1.0, 5.0, 2.0, 6.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_movehl_ps() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); let r = _mm_movehl_ps(a, b); assert_eq_m128(r, _mm_setr_ps(7.0, 8.0, 3.0, 4.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_movelh_ps() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); let r = _mm_movelh_ps(a, b); assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_loadh_pi() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0]; let p = x[..].as_ptr(); let r = _mm_loadh_pi(a, p as *const _); assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_loadl_pi() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0]; let p = x[..].as_ptr(); let r = _mm_loadl_pi(a, p as *const _); assert_eq_m128(r, _mm_setr_ps(5.0, 6.0, 3.0, 4.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_load_ss() { let a = 42.0f32; let r = _mm_load_ss(&a as *const f32); assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_load1_ps() { let a = 42.0f32; let r = _mm_load1_ps(&a as *const f32); assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_load_ps() { let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; let mut p = vals.as_ptr(); let mut fixup = 0.0f32; // Make sure p is aligned, otherwise we might get a // (signal: 11, SIGSEGV: invalid memory reference) let unalignment = (p as usize) & 0xf; if unalignment != 0 { let delta = ((16 - unalignment) >> 2) as isize; fixup = delta as f32; p = p.offset(delta); } let r = _mm_load_ps(p); let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup)); assert_eq_m128(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_loadu_ps() { let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; let p = vals.as_ptr().offset(3); let r = _mm_loadu_ps(black_box(p)); assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0)); } #[simd_test(enable = "sse")] unsafe fn test_mm_loadr_ps() { let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]; let mut p = vals.as_ptr(); let mut fixup = 0.0f32; // Make sure p is aligned, otherwise we might get a // (signal: 11, SIGSEGV: invalid memory reference) let unalignment = (p as usize) & 0xf; if unalignment != 0 { let delta = ((16 - unalignment) >> 2) as isize; fixup = delta as f32; p = p.offset(delta); } let r = _mm_loadr_ps(p); let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup)); assert_eq_m128(r, e); } #[simd_test(enable = "sse")] unsafe fn test_mm_storeh_pi() { let mut vals = [0.0f32; 8]; let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); _mm_storeh_pi(vals.as_mut_ptr() as *mut _, a); assert_eq!(vals[0], 3.0); assert_eq!(vals[1], 4.0); assert_eq!(vals[2], 0.0); } #[simd_test(enable = "sse")] unsafe fn test_mm_storel_pi() { let mut vals = [0.0f32; 8]; let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); _mm_storel_pi(vals.as_mut_ptr() as *mut _, a); assert_eq!(vals[0], 1.0); assert_eq!(vals[1], 2.0); assert_eq!(vals[2], 0.0); } #[simd_test(enable = "sse")] unsafe fn test_mm_store_ss() { let mut vals = [0.0f32; 8]; let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); _mm_store_ss(vals.as_mut_ptr().offset(1), a); assert_eq!(vals[0], 0.0); assert_eq!(vals[1], 1.0); assert_eq!(vals[2], 0.0); } #[simd_test(enable = "sse")] unsafe fn test_mm_store1_ps() { let mut vals = [0.0f32; 8]; let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let mut ofs = 0; let mut p = vals.as_mut_ptr(); if (p as usize) & 0xf != 0 { ofs = (16 - (p as usize) & 0xf) >> 2; p = p.offset(ofs as isize); } _mm_store1_ps(p, *black_box(&a)); if ofs > 0 { assert_eq!(vals[ofs - 1], 0.0); } assert_eq!(vals[ofs + 0], 1.0); assert_eq!(vals[ofs + 1], 1.0); assert_eq!(vals[ofs + 2], 1.0); assert_eq!(vals[ofs + 3], 1.0); assert_eq!(vals[ofs + 4], 0.0); } #[simd_test(enable = "sse")] unsafe fn test_mm_store_ps() { let mut vals = [0.0f32; 8]; let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let mut ofs = 0; let mut p = vals.as_mut_ptr(); // Align p to 16-byte boundary if (p as usize) & 0xf != 0 { ofs = (16 - (p as usize) & 0xf) >> 2; p = p.offset(ofs as isize); } _mm_store_ps(p, *black_box(&a)); if ofs > 0 { assert_eq!(vals[ofs - 1], 0.0); } assert_eq!(vals[ofs + 0], 1.0); assert_eq!(vals[ofs + 1], 2.0); assert_eq!(vals[ofs + 2], 3.0); assert_eq!(vals[ofs + 3], 4.0); assert_eq!(vals[ofs + 4], 0.0); } #[simd_test(enable = "sse")] unsafe fn test_mm_storer_ps() { let mut vals = [0.0f32; 8]; let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let mut ofs = 0; let mut p = vals.as_mut_ptr(); // Align p to 16-byte boundary if (p as usize) & 0xf != 0 { ofs = (16 - (p as usize) & 0xf) >> 2; p = p.offset(ofs as isize); } _mm_storer_ps(p, *black_box(&a)); if ofs > 0 { assert_eq!(vals[ofs - 1], 0.0); } assert_eq!(vals[ofs + 0], 4.0); assert_eq!(vals[ofs + 1], 3.0); assert_eq!(vals[ofs + 2], 2.0); assert_eq!(vals[ofs + 3], 1.0); assert_eq!(vals[ofs + 4], 0.0); } #[simd_test(enable = "sse")] unsafe fn test_mm_storeu_ps() { let mut vals = [0.0f32; 8]; let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let mut ofs = 0; let mut p = vals.as_mut_ptr(); // Make sure p is **not** aligned to 16-byte boundary if (p as usize) & 0xf == 0 { ofs = 1; p = p.offset(1); } _mm_storeu_ps(p, *black_box(&a)); if ofs > 0 { assert_eq!(vals[ofs - 1], 0.0); } assert_eq!(vals[ofs + 0], 1.0); assert_eq!(vals[ofs + 1], 2.0); assert_eq!(vals[ofs + 2], 3.0); assert_eq!(vals[ofs + 3], 4.0); assert_eq!(vals[ofs + 4], 0.0); } #[simd_test(enable = "sse")] unsafe fn test_mm_move_ss() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); let r = _mm_move_ss(a, b); let e = _mm_setr_ps(5.0, 2.0, 3.0, 4.0); assert_eq_m128(e, r); } #[simd_test(enable = "sse")] unsafe fn test_mm_movemask_ps() { let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0)); assert_eq!(r, 0b0101); let r = _mm_movemask_ps(_mm_setr_ps(-1.0, -5.0, -5.0, 0.0)); assert_eq!(r, 0b0111); } #[simd_test(enable = "sse")] unsafe fn test_mm_sfence() { _mm_sfence(); } #[simd_test(enable = "sse")] unsafe fn test_mm_getcsr_setcsr_1() { let saved_csr = _mm_getcsr(); let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0); let b = _mm_setr_ps(0.001, 0.0, 0.0, 1.0); _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); let r = _mm_mul_ps(*black_box(&a), *black_box(&b)); _mm_setcsr(saved_csr); let exp = _mm_setr_ps(0.0, 0.0, 0.0, 1.0); assert_eq_m128(r, exp); // first component is a denormalized f32 } #[simd_test(enable = "sse")] unsafe fn test_mm_getcsr_setcsr_2() { // Same as _mm_setcsr_1 test, but with opposite flag value. let saved_csr = _mm_getcsr(); let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0); let b = _mm_setr_ps(0.001, 0.0, 0.0, 1.0); _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_OFF); let r = _mm_mul_ps(*black_box(&a), *black_box(&b)); _mm_setcsr(saved_csr); let exp = _mm_setr_ps(1.1e-39, 0.0, 0.0, 1.0); assert_eq_m128(r, exp); // first component is a denormalized f32 } #[simd_test(enable = "sse")] unsafe fn test_mm_getcsr_setcsr_underflow() { _MM_SET_EXCEPTION_STATE(0); let a = _mm_setr_ps(1.1e-36, 0.0, 0.0, 1.0); let b = _mm_setr_ps(1e-5, 0.0, 0.0, 1.0); assert_eq!(_MM_GET_EXCEPTION_STATE(), 0); // just to be sure let r = _mm_mul_ps(*black_box(&a), *black_box(&b)); let exp = _mm_setr_ps(1.1e-41, 0.0, 0.0, 1.0); assert_eq_m128(r, exp); let underflow = _MM_GET_EXCEPTION_STATE() & _MM_EXCEPT_UNDERFLOW != 0; assert_eq!(underflow, true); } #[simd_test(enable = "sse")] unsafe fn test_MM_TRANSPOSE4_PS() { let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0); let mut d = _mm_setr_ps(13.0, 14.0, 15.0, 16.0); _MM_TRANSPOSE4_PS(&mut a, &mut b, &mut c, &mut d); assert_eq_m128(a, _mm_setr_ps(1.0, 5.0, 9.0, 13.0)); assert_eq_m128(b, _mm_setr_ps(2.0, 6.0, 10.0, 14.0)); assert_eq_m128(c, _mm_setr_ps(3.0, 7.0, 11.0, 15.0)); assert_eq_m128(d, _mm_setr_ps(4.0, 8.0, 12.0, 16.0)); } #[repr(align(16))] struct Memory { pub data: [f32; 4], } #[simd_test(enable = "sse")] unsafe fn test_mm_stream_ps() { let a = _mm_set1_ps(7.0); let mut mem = Memory { data: [-1.0; 4] }; _mm_stream_ps(&mut mem.data[0] as *mut f32, a); for i in 0..4 { assert_eq!(mem.data[i], get_m128(a, i)); } } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_stream_pi() { let a = transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7)); let mut mem = ::std::boxed::Box::<__m64>::new(transmute(i8x8::splat(1))); _mm_stream_pi(&mut *mem as *mut _ as *mut _, a); assert_eq_m64(a, *mem); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_max_pi16() { let a = _mm_setr_pi16(-1, 6, -3, 8); let b = _mm_setr_pi16(5, -2, 7, -4); let r = _mm_setr_pi16(5, 6, 7, 8); assert_eq_m64(r, _mm_max_pi16(a, b)); assert_eq_m64(r, _m_pmaxsw(a, b)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_max_pu8() { let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8); assert_eq_m64(r, _mm_max_pu8(a, b)); assert_eq_m64(r, _m_pmaxub(a, b)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_min_pi16() { let a = _mm_setr_pi16(-1, 6, -3, 8); let b = _mm_setr_pi16(5, -2, 7, -4); let r = _mm_setr_pi16(-1, -2, -3, -4); assert_eq_m64(r, _mm_min_pi16(a, b)); assert_eq_m64(r, _m_pminsw(a, b)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_min_pu8() { let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4); assert_eq_m64(r, _mm_min_pu8(a, b)); assert_eq_m64(r, _m_pminub(a, b)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_mulhi_pu16() { let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); let r = _mm_mulhi_pu16(a, b); assert_eq_m64(r, _mm_set1_pi16(15)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_mullo_pi16() { let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); let r = _mm_mullo_pi16(a, b); assert_eq_m64(r, _mm_set1_pi16(17960)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_m_pmulhuw() { let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); let r = _m_pmulhuw(a, b); assert_eq_m64(r, _mm_set1_pi16(15)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_avg_pu8() { let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9)); let r = _mm_avg_pu8(a, b); assert_eq_m64(r, _mm_set1_pi8(6)); let r = _m_pavgb(a, b); assert_eq_m64(r, _mm_set1_pi8(6)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_avg_pu16() { let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9)); let r = _mm_avg_pu16(a, b); assert_eq_m64(r, _mm_set1_pi16(6)); let r = _m_pavgw(a, b); assert_eq_m64(r, _mm_set1_pi16(6)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_sad_pu8() { #[rustfmt::skip] let a = _mm_setr_pi8( 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, 1, 2, 3, 4, ); let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1); let r = _mm_sad_pu8(a, b); assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0)); let r = _m_psadbw(a, b); assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvtpi32_ps() { let a = _mm_setr_ps(0., 0., 3., 4.); let b = _mm_setr_pi32(1, 2); let expected = _mm_setr_ps(1., 2., 3., 4.); let r = _mm_cvtpi32_ps(a, b); assert_eq_m128(r, expected); let r = _mm_cvt_pi2ps(a, b); assert_eq_m128(r, expected); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvtpi16_ps() { let a = _mm_setr_pi16(1, 2, 3, 4); let expected = _mm_setr_ps(1., 2., 3., 4.); let r = _mm_cvtpi16_ps(a); assert_eq_m128(r, expected); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvtpu16_ps() { let a = _mm_setr_pi16(1, 2, 3, 4); let expected = _mm_setr_ps(1., 2., 3., 4.); let r = _mm_cvtpu16_ps(a); assert_eq_m128(r, expected); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvtpi8_ps() { let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); let expected = _mm_setr_ps(1., 2., 3., 4.); let r = _mm_cvtpi8_ps(a); assert_eq_m128(r, expected); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvtpu8_ps() { let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); let expected = _mm_setr_ps(1., 2., 3., 4.); let r = _mm_cvtpu8_ps(a); assert_eq_m128(r, expected); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvtpi32x2_ps() { let a = _mm_setr_pi32(1, 2); let b = _mm_setr_pi32(3, 4); let expected = _mm_setr_ps(1., 2., 3., 4.); let r = _mm_cvtpi32x2_ps(a, b); assert_eq_m128(r, expected); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_maskmove_si64() { let a = _mm_set1_pi8(9); let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0); let mut r = _mm_set1_pi8(0); _mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8); let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0); assert_eq_m64(r, e); let mut r = _mm_set1_pi8(0); _m_maskmovq(a, mask, &mut r as *mut _ as *mut i8); assert_eq_m64(r, e); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_extract_pi16() { let a = _mm_setr_pi16(1, 2, 3, 4); let r = _mm_extract_pi16(a, 0); assert_eq!(r, 1); let r = _mm_extract_pi16(a, 1); assert_eq!(r, 2); let r = _m_pextrw(a, 1); assert_eq!(r, 2); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_insert_pi16() { let a = _mm_setr_pi16(1, 2, 3, 4); let r = _mm_insert_pi16(a, 0, 0b0); let expected = _mm_setr_pi16(0, 2, 3, 4); assert_eq_m64(r, expected); let r = _mm_insert_pi16(a, 0, 0b10); let expected = _mm_setr_pi16(1, 2, 0, 4); assert_eq_m64(r, expected); let r = _m_pinsrw(a, 0, 0b10); assert_eq_m64(r, expected); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_movemask_pi8() { let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000); let r = _mm_movemask_pi8(a); assert_eq!(r, 0b10001); let r = _m_pmovmskb(a); assert_eq!(r, 0b10001); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_shuffle_pi16() { let a = _mm_setr_pi16(1, 2, 3, 4); let r = _mm_shuffle_pi16(a, 0b00_01_01_11); let expected = _mm_setr_pi16(4, 2, 2, 1); assert_eq_m64(r, expected); let r = _m_pshufw(a, 0b00_01_01_11); assert_eq_m64(r, expected); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvtps_pi32() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let r = _mm_setr_pi32(1, 2); assert_eq_m64(r, _mm_cvtps_pi32(a)); assert_eq_m64(r, _mm_cvt_ps2pi(a)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvttps_pi32() { let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); let r = _mm_setr_pi32(7, 2); assert_eq_m64(r, _mm_cvttps_pi32(a)); assert_eq_m64(r, _mm_cvtt_ps2pi(a)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvtps_pi16() { let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); let r = _mm_setr_pi16(7, 2, 3, 4); assert_eq_m64(r, _mm_cvtps_pi16(a)); } #[simd_test(enable = "sse,mmx")] unsafe fn test_mm_cvtps_pi8() { let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0); assert_eq_m64(r, _mm_cvtps_pi8(a)); } } core_arch-0.1.5/src/x86/sse2.rs010064400007650000024000005512361345561510300143360ustar0000000000000000//! Streaming SIMD Extensions 2 (SSE2) #[cfg(test)] use stdsimd_test::assert_instr; use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, intrinsics, mem::{self, transmute}, ptr, }; /// Provides a hint to the processor that the code sequence is a spin-wait loop. /// /// This can help improve the performance and power consumption of spin-wait /// loops. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause) #[inline] #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_pause() { // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without // the SSE2 target-feature - therefore it does not require any target features pause() } /// Invalidates and flushes the cache line that contains `p` from all levels of /// the cache hierarchy. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(clflush))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_clflush(p: *mut u8) { clflush(p) } /// Performs a serializing operation on all load-from-memory instructions /// that were issued prior to this instruction. /// /// Guarantees that every load instruction that precedes, in program order, is /// globally visible before any load instruction which follows the fence in /// program order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(lfence))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_lfence() { lfence() } /// Performs a serializing operation on all load-from-memory and store-to-memory /// instructions that were issued prior to this instruction. /// /// Guarantees that every memory access that precedes, in program order, the /// memory fence instruction is globally visible before any memory instruction /// which follows the fence in program order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(mfence))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mfence() { mfence() } /// Adds packed 8-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) } /// Adds packed 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) } /// Adds packed 32-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) } /// Adds packed 64-bit integers in `a` and "b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) } /// Adds packed 8-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { transmute(paddsb(a.as_i8x16(), b.as_i8x16())) } /// Adds packed 16-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(paddsw(a.as_i16x8(), b.as_i16x8())) } /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddusb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { transmute(paddsub(a.as_u8x16(), b.as_u8x16())) } /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(paddusw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { transmute(paddsuw(a.as_u16x8(), b.as_u16x8())) } /// Averages packed unsigned 8-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pavgb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { transmute(pavgb(a.as_u8x16(), b.as_u8x16())) } /// Averages packed unsigned 16-bit integers in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pavgw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { transmute(pavgw(a.as_u16x8(), b.as_u16x8())) } /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. /// /// Multiplies packed signed 16-bit integers in `a` and `b`, producing /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of /// intermediate 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmaddwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmaxsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(pmaxsw(a.as_i16x8(), b.as_i16x8())) } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the /// packed maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmaxub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { transmute(pmaxub(a.as_u8x16(), b.as_u8x16())) } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pminsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(pminsw(a.as_i16x8(), b.as_i16x8())) } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the /// packed minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pminub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { transmute(pminub(a.as_u8x16(), b.as_u8x16())) } /// Multiplies the packed 16-bit integers in `a` and `b`. /// /// The multiplication produces intermediate 32-bit integers, and returns the /// high 16 bits of the intermediate integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmulhw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(pmulhw(a.as_i16x8(), b.as_i16x8())) } /// Multiplies the packed unsigned 16-bit integers in `a` and `b`. /// /// The multiplication produces intermediate 32-bit integers, and returns the /// high 16 bits of the intermediate integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmulhuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { transmute(pmulhuw(a.as_u16x8(), b.as_u16x8())) } /// Multiplies the packed 16-bit integers in `a` and `b`. /// /// The multiplication produces intermediate 32-bit integers, and returns the /// low 16 bits of the intermediate integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmullw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) } /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element /// in `a` and `b`. /// /// Returns the unsigned 64-bit results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmuludq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { transmute(pmuludq(a.as_u32x4(), b.as_u32x4())) } /// Sum the absolute differences of packed unsigned 8-bit integers. /// /// Computes the absolute differences of packed unsigned 8-bit integers in `a` /// and `b`, then horizontally sum each consecutive 8 differences to produce /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in /// the low 16 bits of 64-bit elements returned. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psadbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { transmute(psadbw(a.as_u8x16(), b.as_u8x16())) } /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) } /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) } /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) } /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) } /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` /// using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { transmute(psubsb(a.as_i8x16(), b.as_i8x16())) } /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` /// using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(psubsw(a.as_i16x8(), b.as_i16x8())) } /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit /// integers in `a` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubusb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { transmute(psubusb(a.as_u8x16(), b.as_u8x16())) } /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit /// integers in `a` using saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psubusw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { transmute(psubusw(a.as_u16x8(), b.as_u16x8())) } /// Shifts `a` left by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_slli_si128_impl(a, imm8) } /// Implementation detail: converts the immediate argument of the /// `_mm_slli_si128` intrinsic into a compile-time constant. #[inline] #[target_feature(enable = "sse2")] unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i { let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32); let a = a.as_i8x16(); macro_rules! shuffle { ($shift:expr) => { simd_shuffle16::( zero, a, [ 16 - $shift, 17 - $shift, 18 - $shift, 19 - $shift, 20 - $shift, 21 - $shift, 22 - $shift, 23 - $shift, 24 - $shift, 25 - $shift, 26 - $shift, 27 - $shift, 28 - $shift, 29 - $shift, 30 - $shift, 31 - $shift, ], ) }; } let x = match imm8 { 0 => shuffle!(0), 1 => shuffle!(1), 2 => shuffle!(2), 3 => shuffle!(3), 4 => shuffle!(4), 5 => shuffle!(5), 6 => shuffle!(6), 7 => shuffle!(7), 8 => shuffle!(8), 9 => shuffle!(9), 10 => shuffle!(10), 11 => shuffle!(11), 12 => shuffle!(12), 13 => shuffle!(13), 14 => shuffle!(14), 15 => shuffle!(15), _ => shuffle!(16), }; transmute(x) } /// Shifts `a` left by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_slli_si128_impl(a, imm8) } /// Shifts `a` right by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_srli_si128_impl(a, imm8) } /// Shifts packed 16-bit integers in `a` left by `imm8` while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllw, imm8 = 7))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i { transmute(pslliw(a.as_i16x8(), imm8)) } /// Shifts packed 16-bit integers in `a` left by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { transmute(psllw(a.as_i16x8(), count.as_i16x8())) } /// Shifts packed 32-bit integers in `a` left by `imm8` while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslld, imm8 = 7))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i { transmute(psllid(a.as_i32x4(), imm8)) } /// Shifts packed 32-bit integers in `a` left by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pslld))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { transmute(pslld(a.as_i32x4(), count.as_i32x4())) } /// Shifts packed 64-bit integers in `a` left by `imm8` while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllq, imm8 = 7))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i { transmute(pslliq(a.as_i64x2(), imm8)) } /// Shifts packed 64-bit integers in `a` left by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psllq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { transmute(psllq(a.as_i64x2(), count.as_i64x2())) } /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in sign /// bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psraw, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i { transmute(psraiw(a.as_i16x8(), imm8)) } /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign /// bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psraw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { transmute(psraw(a.as_i16x8(), count.as_i16x8())) } /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in sign /// bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrad, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i { transmute(psraid(a.as_i32x4(), imm8)) } /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign /// bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrad))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { transmute(psrad(a.as_i32x4(), count.as_i32x4())) } /// Shifts `a` right by `imm8` bytes while shifting in zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i { _mm_srli_si128_impl(a, imm8) } /// Implementation detail: converts the immediate argument of the /// `_mm_srli_si128` intrinsic into a compile-time constant. #[inline] #[target_feature(enable = "sse2")] unsafe fn _mm_srli_si128_impl(a: __m128i, imm8: i32) -> __m128i { let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32); let a = a.as_i8x16(); macro_rules! shuffle { ($shift:expr) => { simd_shuffle16( a, zero, [ 0 + $shift, 1 + $shift, 2 + $shift, 3 + $shift, 4 + $shift, 5 + $shift, 6 + $shift, 7 + $shift, 8 + $shift, 9 + $shift, 10 + $shift, 11 + $shift, 12 + $shift, 13 + $shift, 14 + $shift, 15 + $shift, ], ) }; } let x: i8x16 = match imm8 { 0 => shuffle!(0), 1 => shuffle!(1), 2 => shuffle!(2), 3 => shuffle!(3), 4 => shuffle!(4), 5 => shuffle!(5), 6 => shuffle!(6), 7 => shuffle!(7), 8 => shuffle!(8), 9 => shuffle!(9), 10 => shuffle!(10), 11 => shuffle!(11), 12 => shuffle!(12), 13 => shuffle!(13), 14 => shuffle!(14), 15 => shuffle!(15), _ => shuffle!(16), }; transmute(x) } /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlw, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i { transmute(psrliw(a.as_i16x8(), imm8)) } /// Shifts packed 16-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { transmute(psrlw(a.as_i16x8(), count.as_i16x8())) } /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrld, imm8 = 8))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i { transmute(psrlid(a.as_i32x4(), imm8)) } /// Shifts packed 32-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrld))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { transmute(psrld(a.as_i32x4(), count.as_i32x4())) } /// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlq, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i { transmute(psrliq(a.as_i64x2(), imm8)) } /// Shifts packed 64-bit integers in `a` right by `count` while shifting in /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(psrlq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { transmute(psrlq(a.as_i64x2(), count.as_i64x2())) } /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { simd_and(a, b) } /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and /// then AND with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andnps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) } /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(orps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { simd_or(a, b) } /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { simd_xor(a, b) } /// Compares packed 8-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpeqb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_eq(a.as_i8x16(), b.as_i8x16())) } /// Compares packed 16-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpeqw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_eq(a.as_i16x8(), b.as_i16x8())) } /// Compares packed 32-bit integers in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpeqd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_eq(a.as_i32x4(), b.as_i32x4())) } /// Compares packed 8-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_gt(a.as_i8x16(), b.as_i8x16())) } /// Compares packed 16-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_gt(a.as_i16x8(), b.as_i16x8())) } /// Compares packed 32-bit integers in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_gt(a.as_i32x4(), b.as_i32x4())) } /// Compares packed 8-bit integers in `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_lt(a.as_i8x16(), b.as_i8x16())) } /// Compares packed 16-bit integers in `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_lt(a.as_i16x8(), b.as_i16x8())) } /// Compares packed 32-bit integers in `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pcmpgtd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_lt(a.as_i32x4(), b.as_i32x4())) } /// Converts the lower two packed 32-bit integers in `a` to packed /// double-precision (64-bit) floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtdq2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { let a = a.as_i32x4(); simd_cast::(simd_shuffle2(a, a, [0, 1])) } /// Returns `a` with its lower element replaced by `b` after converting it to /// an `f64`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsi2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { simd_insert(a, 0, b as f64) } /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtdq2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { cvtdq2ps(a.as_i32x4()) } /// Converts packed single-precision (32-bit) floating-point elements in `a` /// to packed 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { transmute(cvtps2dq(a)) } /// Returns a vector whose lowest element is `a` and all higher elements are /// `0`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { transmute(i32x4::new(a, 0, 0, 0)) } /// Returns the lowest element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { simd_extract(a.as_i32x4(), 0) } /// Sets packed 64-bit integers with the supplied values, from highest to /// lowest. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { transmute(i64x2::new(e0, e1)) } /// Sets packed 32-bit integers with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { transmute(i32x4::new(e0, e1, e2, e3)) } /// Sets packed 16-bit integers with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_epi16( e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16, ) -> __m128i { transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) } /// Sets packed 8-bit integers with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_epi8( e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, ) -> __m128i { #[rustfmt::skip] transmute(i8x16::new( e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, )) } /// Broadcasts 64-bit integer `a` to all elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { _mm_set_epi64x(a, a) } /// Broadcasts 32-bit integer `a` to all elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { _mm_set_epi32(a, a, a, a) } /// Broadcasts 16-bit integer `a` to all elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { _mm_set_epi16(a, a, a, a, a, a, a, a) } /// Broadcasts 8-bit integer `a` to all elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) } /// Sets packed 32-bit integers with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { _mm_set_epi32(e0, e1, e2, e3) } /// Sets packed 16-bit integers with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_epi16( e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16, ) -> __m128i { _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7) } /// Sets packed 8-bit integers with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8) #[inline] #[target_feature(enable = "sse2")] // no particular instruction to test #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_epi8( e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, ) -> __m128i { #[rustfmt::skip] _mm_set_epi8( e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, ) } /// Returns a vector with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_si128() -> __m128i { _mm_set1_epi64x(0) } /// Loads 64-bit integer from memory into first element of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64) #[inline] #[target_feature(enable = "sse2")] // FIXME movsd on windows #[cfg_attr( all( test, not(windows), not(all(target_os = "linux", target_arch = "x86_64")), target_arch = "x86_64" ), assert_instr(movq) )] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64)) } /// Loads 128-bits of integer data from memory into a new vector. /// /// `mem_addr` must be aligned on a 16-byte boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { *mem_addr } /// Loads 128-bits of integer data from memory into a new vector. /// /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { let mut dst: __m128i = _mm_undefined_si128(); ptr::copy_nonoverlapping( mem_addr as *const u8, &mut dst as *mut __m128i as *mut u8, mem::size_of::<__m128i>(), ); dst } /// Conditionally store 8-bit integer elements from `a` into memory using /// `mask`. /// /// Elements are not stored when the highest bit is not set in the /// corresponding element. /// /// `mem_addr` should correspond to a 128-bit memory location and does not need /// to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(maskmovdqu))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) } /// Stores 128-bits of integer data from `a` into memory. /// /// `mem_addr` must be aligned on a 16-byte boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { *mem_addr = a; } /// Stores 128-bits of integer data from `a` into memory. /// /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { storeudq(mem_addr as *mut i8, a); } /// Stores the lower 64-bit integer `a` to a memory location. /// /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64) #[inline] #[target_feature(enable = "sse2")] // FIXME mov on windows, movlps on i686 #[cfg_attr( all( test, not(windows), not(all(target_os = "linux", target_arch = "x86_64")), target_arch = "x86_64" ), assert_instr(movq) )] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { ptr::copy_nonoverlapping(&a as *const _ as *const u8, mem_addr as *mut u8, 8); } /// Stores a 128-bit integer vector to a 128-bit aligned memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { intrinsics::nontemporal_store(mem_addr, a); } /// Stores a 32-bit integer value in the specified memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movnti))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { intrinsics::nontemporal_store(mem_addr, a); } /// Returns a vector where the low element is extracted from `a` and its upper /// element is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64) #[inline] #[target_feature(enable = "sse2")] // FIXME movd on windows, movd on i686 #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { let zero = _mm_setzero_si128(); let r: i64x2 = simd_shuffle2(a.as_i64x2(), zero.as_i64x2(), [0, 2]); transmute(r) } /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packsswb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packssdw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) } /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers /// using unsigned saturation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packuswb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) } /// Returns the `imm8` element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pextrw, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 { simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32 } /// Returns a new vector where the `imm8` element of `a` is replaced with `i`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i { transmute(simd_insert(a.as_i16x8(), (imm8 & 7) as u32, i as i16)) } /// Returns a mask of the most significant bit of each element in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pmovmskb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { pmovmskb(a.as_i8x16()) } /// Shuffles 32-bit integers in `a` using the control in `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshufd, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { // simd_shuffleX requires that its selector parameter be made up of // constant values, but we can't enforce that here. In spirit, we need // to write a `match` on all possible values of a byte, and for each value, // hard-code the correct `simd_shuffleX` call using only constants. We // then hope for LLVM to do the rest. // // Of course, that's... awful. So we try to use macros to do it for us. let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i32x4(); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { simd_shuffle4(a, a, [$x01, $x23, $x45, $x67]) }; } macro_rules! shuffle_x67 { ($x01:expr, $x23:expr, $x45:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => shuffle_done!($x01, $x23, $x45, 0), 0b01 => shuffle_done!($x01, $x23, $x45, 1), 0b10 => shuffle_done!($x01, $x23, $x45, 2), _ => shuffle_done!($x01, $x23, $x45, 3), } }; } macro_rules! shuffle_x45 { ($x01:expr, $x23:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => shuffle_x67!($x01, $x23, 0), 0b01 => shuffle_x67!($x01, $x23, 1), 0b10 => shuffle_x67!($x01, $x23, 2), _ => shuffle_x67!($x01, $x23, 3), } }; } macro_rules! shuffle_x23 { ($x01:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => shuffle_x45!($x01, 0), 0b01 => shuffle_x45!($x01, 1), 0b10 => shuffle_x45!($x01, 2), _ => shuffle_x45!($x01, 3), } }; } let x: i32x4 = match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), }; transmute(x) } /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in /// `imm8`. /// /// Put the results in the high 64 bits of the returned vector, with the low 64 /// bits being copied from from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i16x8(); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { simd_shuffle8(a, a, [0, 1, 2, 3, $x01 + 4, $x23 + 4, $x45 + 4, $x67 + 4]) }; } macro_rules! shuffle_x67 { ($x01:expr, $x23:expr, $x45:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => shuffle_done!($x01, $x23, $x45, 0), 0b01 => shuffle_done!($x01, $x23, $x45, 1), 0b10 => shuffle_done!($x01, $x23, $x45, 2), _ => shuffle_done!($x01, $x23, $x45, 3), } }; } macro_rules! shuffle_x45 { ($x01:expr, $x23:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => shuffle_x67!($x01, $x23, 0), 0b01 => shuffle_x67!($x01, $x23, 1), 0b10 => shuffle_x67!($x01, $x23, 2), _ => shuffle_x67!($x01, $x23, 3), } }; } macro_rules! shuffle_x23 { ($x01:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => shuffle_x45!($x01, 0), 0b01 => shuffle_x45!($x01, 1), 0b10 => shuffle_x45!($x01, 2), _ => shuffle_x45!($x01, 3), } }; } let x: i16x8 = match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), }; transmute(x) } /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in /// `imm8`. /// /// Put the results in the low 64 bits of the returned vector, with the high 64 /// bits being copied from from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i { // See _mm_shuffle_epi32. let imm8 = (imm8 & 0xFF) as u8; let a = a.as_i16x8(); macro_rules! shuffle_done { ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { simd_shuffle8(a, a, [$x01, $x23, $x45, $x67, 4, 5, 6, 7]) }; } macro_rules! shuffle_x67 { ($x01:expr, $x23:expr, $x45:expr) => { match (imm8 >> 6) & 0b11 { 0b00 => shuffle_done!($x01, $x23, $x45, 0), 0b01 => shuffle_done!($x01, $x23, $x45, 1), 0b10 => shuffle_done!($x01, $x23, $x45, 2), _ => shuffle_done!($x01, $x23, $x45, 3), } }; } macro_rules! shuffle_x45 { ($x01:expr, $x23:expr) => { match (imm8 >> 4) & 0b11 { 0b00 => shuffle_x67!($x01, $x23, 0), 0b01 => shuffle_x67!($x01, $x23, 1), 0b10 => shuffle_x67!($x01, $x23, 2), _ => shuffle_x67!($x01, $x23, 3), } }; } macro_rules! shuffle_x23 { ($x01:expr) => { match (imm8 >> 2) & 0b11 { 0b00 => shuffle_x45!($x01, 0), 0b01 => shuffle_x45!($x01, 1), 0b10 => shuffle_x45!($x01, 2), _ => shuffle_x45!($x01, 3), } }; } let x: i16x8 = match imm8 & 0b11 { 0b00 => shuffle_x23!(0), 0b01 => shuffle_x23!(1), 0b10 => shuffle_x23!(2), _ => shuffle_x23!(3), }; transmute(x) } /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpckhbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_shuffle16( a.as_i8x16(), b.as_i8x16(), [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], )) } /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpckhwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); transmute::(x) } /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpckhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) } /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [1, 3])) } /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpcklbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_shuffle16( a.as_i8x16(), b.as_i8x16(), [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], )) } /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(punpcklwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); transmute::(x) } /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpcklps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) } /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { transmute::(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [0, 2])) } /// Returns a new vector with the low element of `a` replaced by the sum of the /// low elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(addsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) } /// Adds packed double-precision (64-bit) floating-point elements in `a` and /// `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(addpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { simd_add(a, b) } /// Returns a new vector with the low element of `a` replaced by the result of /// diving the lower element of `a` by the lower element of `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(divsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) } /// Divide packed double-precision (64-bit) floating-point elements in `a` by /// packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(divpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { simd_div(a, b) } /// Returns a new vector with the low element of `a` replaced by the maximum /// of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(maxsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { maxsd(a, b) } /// Returns a new vector with the maximum values from corresponding elements in /// `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(maxpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { maxpd(a, b) } /// Returns a new vector with the low element of `a` replaced by the minimum /// of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(minsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { minsd(a, b) } /// Returns a new vector with the minimum values from corresponding elements in /// `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(minpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { minpd(a, b) } /// Returns a new vector with the low element of `a` replaced by multiplying the /// low elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(mulsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) } /// Multiplies packed double-precision (64-bit) floating-point elements in `a` /// and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(mulpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { simd_mul(a, b) } /// Returns a new vector with the low element of `a` replaced by the square /// root of the lower element `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(sqrtsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b))) } /// Returns a new vector with the square root of each of the values in `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(sqrtpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { sqrtpd(a) } /// Returns a new vector with the low element of `a` replaced by subtracting the /// low element by `b` from the low element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(subsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) } /// Subtract packed double-precision (64-bit) floating-point elements in `b` /// from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(subpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { simd_sub(a, b) } /// Computes the bitwise AND of packed double-precision (64-bit) floating-point /// elements in `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { let a: __m128i = transmute(a); let b: __m128i = transmute(b); transmute(_mm_and_si128(a, b)) } /// Computes the bitwise NOT of `a` and then AND with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(andnps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { let a: __m128i = transmute(a); let b: __m128i = transmute(b); transmute(_mm_andnot_si128(a, b)) } /// Computes the bitwise OR of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(orps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { let a: __m128i = transmute(a); let b: __m128i = transmute(b); transmute(_mm_or_si128(a, b)) } /// Computes the bitwise OR of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { let a: __m128i = transmute(a); let b: __m128i = transmute(b); transmute(_mm_xor_si128(a, b)) } /// Returns a new vector with the low element of `a` replaced by the equality /// comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpeqsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 0) } /// Returns a new vector with the low element of `a` replaced by the less-than /// comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 1) } /// Returns a new vector with the low element of `a` replaced by the /// less-than-or-equal comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplesd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 2) } /// Returns a new vector with the low element of `a` replaced by the /// greater-than comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } /// Returns a new vector with the low element of `a` replaced by the /// greater-than-or-equal comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplesd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } /// Returns a new vector with the low element of `a` replaced by the result /// of comparing both of the lower elements of `a` and `b` to `NaN`. If /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpordsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 7) } /// Returns a new vector with the low element of `a` replaced by the result of /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpunordsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 3) } /// Returns a new vector with the low element of `a` replaced by the not-equal /// comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpneqsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 4) } /// Returns a new vector with the low element of `a` replaced by the /// not-less-than comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 5) } /// Returns a new vector with the low element of `a` replaced by the /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlesd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { cmpsd(a, b, 6) } /// Returns a new vector with the low element of `a` replaced by the /// not-greater-than comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } /// Returns a new vector with the low element of `a` replaced by the /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlesd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) } /// Compares corresponding elements in `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpeqpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 0) } /// Compares corresponding elements in `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 1) } /// Compares corresponding elements in `a` and `b` for less-than-or-equal /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplepd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 2) } /// Compares corresponding elements in `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpltpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmplt_pd(b, a) } /// Compares corresponding elements in `a` and `b` for greater-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmplepd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmple_pd(b, a) } /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpordpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 7) } /// Compares corresponding elements in `a` and `b` to see if either is `NaN`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpunordpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 3) } /// Compares corresponding elements in `a` and `b` for not-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpneqpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 4) } /// Compares corresponding elements in `a` and `b` for not-less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 5) } /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlepd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { cmppd(a, b, 6) } /// Compares corresponding elements in `a` and `b` for not-greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnltpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnlt_pd(b, a) } /// Compares corresponding elements in `a` and `b` for /// not-greater-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cmpnlepd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { _mm_cmpnle_pd(b, a) } /// Compares the lower element of `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { comieqsd(a, b) } /// Compares the lower element of `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { comiltsd(a, b) } /// Compares the lower element of `a` and `b` for less-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { comilesd(a, b) } /// Compares the lower element of `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { comigtsd(a, b) } /// Compares the lower element of `a` and `b` for greater-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { comigesd(a, b) } /// Compares the lower element of `a` and `b` for not-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(comisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { comineqsd(a, b) } /// Compares the lower element of `a` and `b` for equality. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { ucomieqsd(a, b) } /// Compares the lower element of `a` and `b` for less-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { ucomiltsd(a, b) } /// Compares the lower element of `a` and `b` for less-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { ucomilesd(a, b) } /// Compares the lower element of `a` and `b` for greater-than. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { ucomigtsd(a, b) } /// Compares the lower element of `a` and `b` for greater-than-or-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { ucomigesd(a, b) } /// Compares the lower element of `a` and `b` for not-equal. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(ucomisd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { ucomineqsd(a, b) } /// Converts packed double-precision (64-bit) floating-point elements in "a" to /// packed single-precision (32-bit) floating-point elements /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtpd2ps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { cvtpd2ps(a) } /// Converts packed single-precision (32-bit) floating-point elements in `a` to /// packed /// double-precision (64-bit) floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtps2pd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { cvtps2pd(a) } /// Converts packed double-precision (64-bit) floating-point elements in `a` to /// packed 32-bit integers. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { transmute(cvtpd2dq(a)) } /// Converts the lower double-precision (64-bit) floating-point element in a to /// a 32-bit integer. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { cvtsd2si(a) } /// Converts the lower double-precision (64-bit) floating-point element in `b` /// to a single-precision (32-bit) floating-point element, store the result in /// the lower element of the return value, and copies the upper element from `a` /// to the upper element the return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { cvtsd2ss(a, b) } /// Returns the lower double-precision (64-bit) floating-point element of "a". /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { simd_extract(a, 0) } /// Converts the lower single-precision (32-bit) floating-point element in `b` /// to a double-precision (64-bit) floating-point element, store the result in /// the lower element of the return value, and copies the upper element from `a` /// to the upper element the return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtss2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { cvtss2sd(a, b) } /// Converts packed double-precision (64-bit) floating-point elements in `a` to /// packed 32-bit integers with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttpd2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { transmute(cvttpd2dq(a)) } /// Converts the lower double-precision (64-bit) floating-point element in `a` /// to a 32-bit integer with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { cvttsd2si(a) } /// Converts packed single-precision (32-bit) floating-point elements in `a` to /// packed 32-bit integers with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttps2dq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { transmute(cvttps2dq(a)) } /// Copies double-precision (64-bit) floating-point element `a` to the lower /// element of the packed 64-bit return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_sd(a: f64) -> __m128d { _mm_set_pd(0.0, a) } /// Broadcasts double-precision (64-bit) floating-point value a to all elements /// of the return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { _mm_set_pd(a, a) } /// Broadcasts double-precision (64-bit) floating-point value a to all elements /// of the return value. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { _mm_set_pd(a, a) } /// Sets packed double-precision (64-bit) floating-point elements in the return /// value with the supplied values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { __m128d(b, a) } /// Sets packed double-precision (64-bit) floating-point elements in the return /// value with the supplied values in reverse order. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { _mm_set_pd(b, a) } /// Returns packed double-precision (64-bit) floating-point elements with all /// zeros. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_pd() -> __m128d { _mm_set_pd(0.0, 0.0) } /// Returns a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 2 least significant bits of the return value. /// All other bits are set to `0`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movmskpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { movmskpd(a) } /// Loads 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from memory into the returned vector. /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection /// exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { *(mem_addr as *const __m128d) } /// Loads a 64-bit double-precision value to the low element of a /// 128-bit integer vector and clears the upper element. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { _mm_setr_pd(*mem_addr, 0.) } /// Loads a double-precision value into the high-order bits of a 128-bit /// vector of `[2 x double]`. The low-order bits are copied from the low-order /// bits of the first operand. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { _mm_setr_pd(simd_extract(a, 0), *mem_addr) } /// Loads a double-precision value into the low-order bits of a 128-bit /// vector of `[2 x double]`. The high-order bits are copied from the /// high-order bits of the first operand. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movlpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { _mm_setr_pd(*mem_addr, simd_extract(a, 1)) } /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit /// aligned memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { intrinsics::nontemporal_store(mem_addr as *mut __m128d, a); } /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a /// memory location. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { *mem_addr = simd_extract(a, 0) } /// Stores 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. `mem_addr` must be aligned /// on a 16-byte boundary or a general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movaps))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { *(mem_addr as *mut __m128d) = a; } /// Stores 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from `a` into memory. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { storeupd(mem_addr as *mut i8, a); } /// Stores the lower double-precision (64-bit) floating-point element from `a` /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a /// 16-byte boundary or a general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { let b: __m128d = simd_shuffle2(a, a, [0, 0]); *(mem_addr as *mut __m128d) = b; } /// Stores the lower double-precision (64-bit) floating-point element from `a` /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a /// 16-byte boundary or a general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { let b: __m128d = simd_shuffle2(a, a, [0, 0]); *(mem_addr as *mut __m128d) = b; } /// Stores 2 double-precision (64-bit) floating-point elements from `a` into /// memory in reverse order. /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection /// exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { let b: __m128d = simd_shuffle2(a, a, [1, 0]); *(mem_addr as *mut __m128d) = b; } /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a /// memory location. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { *mem_addr = simd_extract(a, 1); } /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a /// memory location. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { *mem_addr = simd_extract(a, 0); } /// Loads a double-precision (64-bit) floating-point element from memory /// into both elements of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd) #[inline] #[target_feature(enable = "sse2")] // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { let d = *mem_addr; _mm_setr_pd(d, d) } /// Loads a double-precision (64-bit) floating-point element from memory /// into both elements of returned vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1) #[inline] #[target_feature(enable = "sse2")] // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { _mm_load1_pd(mem_addr) } /// Loads 2 double-precision (64-bit) floating-point elements from memory into /// the returned vector in reverse order. `mem_addr` must be aligned on a /// 16-byte boundary or a general-protection exception may be generated. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movapd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { let a = _mm_load_pd(mem_addr); simd_shuffle2(a, a, [1, 0]) } /// Loads 128-bits (composed of 2 packed double-precision (64-bit) /// floating-point elements) from memory into the returned vector. /// `mem_addr` does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movups))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { let mut dst = _mm_undefined_pd(); ptr::copy_nonoverlapping( mem_addr as *const u8, &mut dst as *mut __m128d as *mut u8, mem::size_of::<__m128d>(), ); dst } /// Constructs a 128-bit floating-point vector of `[2 x double]` from two /// 128-bit vector parameters of `[2 x double]`, using the immediate-value /// parameter as a specifier. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(shufpd, imm8 = 1))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { match imm8 & 0b11 { 0b00 => simd_shuffle2(a, b, [0, 2]), 0b01 => simd_shuffle2(a, b, [1, 2]), 0b10 => simd_shuffle2(a, b, [0, 3]), _ => simd_shuffle2(a, b, [1, 3]), } } /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower /// 64 bits are set to the lower 64 bits of the second parameter. The upper /// 64 bits are set to the upper 64 bits of the first parameter. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1)) } /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit /// floating-point vector of `[4 x float]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { transmute(a) } /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit /// integer vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { transmute(a) } /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit /// floating-point vector of `[2 x double]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { transmute(a) } /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit /// integer vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { transmute(a) } /// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of `[2 x double]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { transmute(a) } /// Casts a 128-bit integer vector into a 128-bit floating-point vector /// of `[4 x float]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { transmute(a) } /// Returns vector of type __m128d with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_pd() -> __m128d { // FIXME: this function should return MaybeUninit<__m128d> mem::MaybeUninit::<__m128d>::uninit().assume_init() } /// Returns vector of type __m128i with undefined elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128) #[inline] #[target_feature(enable = "sse2")] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_undefined_si128() -> __m128i { // FIXME: this function should return MaybeUninit<__m128i> mem::MaybeUninit::<__m128i>::uninit().assume_init() } /// The resulting `__m128d` element is composed by the low-order values of /// the two `__m128d` interleaved input elements, i.e.: /// /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first /// input /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(unpckhpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { simd_shuffle2(a, b, [1, 3]) } /// The resulting `__m128d` element is composed by the high-order values of /// the two `__m128d` interleaved input elements, i.e.: /// /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { simd_shuffle2(a, b, [0, 2]) } /// Adds two signed or unsigned 64-bit integer values, returning the /// lower 64 bits of the sum. #[inline] #[target_feature(enable = "sse2,mmx")] #[cfg_attr(test, assert_instr(paddq))] pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 { paddq(a, b) } /// Multiplies 32-bit unsigned integer values contained in the lower bits /// of the two 64-bit integer vectors and returns the 64-bit unsigned /// product. #[inline] #[target_feature(enable = "sse2,mmx")] #[cfg_attr(test, assert_instr(pmuludq))] pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 { pmuludq2(a, b) } /// Subtracts signed or unsigned 64-bit integer values and writes the /// difference to the corresponding bits in the destination. #[inline] #[target_feature(enable = "sse2,mmx")] #[cfg_attr(test, assert_instr(psubq))] pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 { psubq(a, b) } /// Converts the two signed 32-bit integer elements of a 64-bit vector of /// `[2 x i32]` into two double-precision floating-point values, returned in a /// 128-bit vector of `[2 x double]`. #[inline] #[target_feature(enable = "sse2,mmx")] #[cfg_attr(test, assert_instr(cvtpi2pd))] pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d { cvtpi2pd(a) } /// Initializes both 64-bit values in a 128-bit vector of `[2 x i64]` with /// the specified 64-bit integer values. #[inline] #[target_feature(enable = "sse2,mmx")] // no particular instruction to test pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i { _mm_set_epi64x(transmute(e1), transmute(e0)) } /// Initializes both values in a 128-bit vector of `[2 x i64]` with the /// specified 64-bit value. #[inline] #[target_feature(enable = "sse2,mmx")] // no particular instruction to test pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i { _mm_set_epi64x(transmute(a), transmute(a)) } /// Constructs a 128-bit integer vector, initialized in reverse order /// with the specified 64-bit integral values. #[inline] #[target_feature(enable = "sse2,mmx")] // no particular instruction to test pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i { _mm_set_epi64x(transmute(e0), transmute(e1)) } /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit /// integer. #[inline] #[target_feature(enable = "sse2,mmx")] // #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong // instr? pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 { transmute(simd_extract::<_, i64>(a.as_i64x2(), 0)) } /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the /// upper bits. #[inline] #[target_feature(enable = "sse2,mmx")] // #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong // instr? pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i { _mm_set_epi64x(0, transmute(a)) } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of `[2 x double]` into two signed 32-bit integer values, /// returned in a 64-bit vector of `[2 x i32]`. #[inline] #[target_feature(enable = "sse2,mmx")] #[cfg_attr(test, assert_instr(cvtpd2pi))] pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 { cvtpd2pi(a) } /// Converts the two double-precision floating-point elements of a /// 128-bit vector of `[2 x double]` into two signed 32-bit integer values, /// returned in a 64-bit vector of `[2 x i32]`. /// If the result of either conversion is inexact, the result is truncated /// (rounded towards zero) regardless of the current MXCSR setting. #[inline] #[target_feature(enable = "sse2,mmx")] #[cfg_attr(test, assert_instr(cvttpd2pi))] pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 { cvttpd2pi(a) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse2.pause"] fn pause(); #[link_name = "llvm.x86.sse2.clflush"] fn clflush(p: *mut u8); #[link_name = "llvm.x86.sse2.lfence"] fn lfence(); #[link_name = "llvm.x86.sse2.mfence"] fn mfence(); #[link_name = "llvm.x86.sse2.padds.b"] fn paddsb(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.x86.sse2.padds.w"] fn paddsw(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.sse2.paddus.b"] fn paddsub(a: u8x16, b: u8x16) -> u8x16; #[link_name = "llvm.x86.sse2.paddus.w"] fn paddsuw(a: u16x8, b: u16x8) -> u16x8; #[link_name = "llvm.x86.sse2.pavg.b"] fn pavgb(a: u8x16, b: u8x16) -> u8x16; #[link_name = "llvm.x86.sse2.pavg.w"] fn pavgw(a: u16x8, b: u16x8) -> u16x8; #[link_name = "llvm.x86.sse2.pmadd.wd"] fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; #[link_name = "llvm.x86.sse2.pmaxs.w"] fn pmaxsw(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.sse2.pmaxu.b"] fn pmaxub(a: u8x16, b: u8x16) -> u8x16; #[link_name = "llvm.x86.sse2.pmins.w"] fn pminsw(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.sse2.pminu.b"] fn pminub(a: u8x16, b: u8x16) -> u8x16; #[link_name = "llvm.x86.sse2.pmulh.w"] fn pmulhw(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.sse2.pmulhu.w"] fn pmulhuw(a: u16x8, b: u16x8) -> u16x8; #[link_name = "llvm.x86.sse2.pmulu.dq"] fn pmuludq(a: u32x4, b: u32x4) -> u64x2; #[link_name = "llvm.x86.sse2.psad.bw"] fn psadbw(a: u8x16, b: u8x16) -> u64x2; #[link_name = "llvm.x86.sse2.psubs.b"] fn psubsb(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.x86.sse2.psubs.w"] fn psubsw(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.sse2.psubus.b"] fn psubusb(a: u8x16, b: u8x16) -> u8x16; #[link_name = "llvm.x86.sse2.psubus.w"] fn psubusw(a: u16x8, b: u16x8) -> u16x8; #[link_name = "llvm.x86.sse2.pslli.w"] fn pslliw(a: i16x8, imm8: i32) -> i16x8; #[link_name = "llvm.x86.sse2.psll.w"] fn psllw(a: i16x8, count: i16x8) -> i16x8; #[link_name = "llvm.x86.sse2.pslli.d"] fn psllid(a: i32x4, imm8: i32) -> i32x4; #[link_name = "llvm.x86.sse2.psll.d"] fn pslld(a: i32x4, count: i32x4) -> i32x4; #[link_name = "llvm.x86.sse2.pslli.q"] fn pslliq(a: i64x2, imm8: i32) -> i64x2; #[link_name = "llvm.x86.sse2.psll.q"] fn psllq(a: i64x2, count: i64x2) -> i64x2; #[link_name = "llvm.x86.sse2.psrai.w"] fn psraiw(a: i16x8, imm8: i32) -> i16x8; #[link_name = "llvm.x86.sse2.psra.w"] fn psraw(a: i16x8, count: i16x8) -> i16x8; #[link_name = "llvm.x86.sse2.psrai.d"] fn psraid(a: i32x4, imm8: i32) -> i32x4; #[link_name = "llvm.x86.sse2.psra.d"] fn psrad(a: i32x4, count: i32x4) -> i32x4; #[link_name = "llvm.x86.sse2.psrli.w"] fn psrliw(a: i16x8, imm8: i32) -> i16x8; #[link_name = "llvm.x86.sse2.psrl.w"] fn psrlw(a: i16x8, count: i16x8) -> i16x8; #[link_name = "llvm.x86.sse2.psrli.d"] fn psrlid(a: i32x4, imm8: i32) -> i32x4; #[link_name = "llvm.x86.sse2.psrl.d"] fn psrld(a: i32x4, count: i32x4) -> i32x4; #[link_name = "llvm.x86.sse2.psrli.q"] fn psrliq(a: i64x2, imm8: i32) -> i64x2; #[link_name = "llvm.x86.sse2.psrl.q"] fn psrlq(a: i64x2, count: i64x2) -> i64x2; #[link_name = "llvm.x86.sse2.cvtdq2ps"] fn cvtdq2ps(a: i32x4) -> __m128; #[link_name = "llvm.x86.sse2.cvtps2dq"] fn cvtps2dq(a: __m128) -> i32x4; #[link_name = "llvm.x86.sse2.maskmov.dqu"] fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); #[link_name = "llvm.x86.sse2.packsswb.128"] fn packsswb(a: i16x8, b: i16x8) -> i8x16; #[link_name = "llvm.x86.sse2.packssdw.128"] fn packssdw(a: i32x4, b: i32x4) -> i16x8; #[link_name = "llvm.x86.sse2.packuswb.128"] fn packuswb(a: i16x8, b: i16x8) -> u8x16; #[link_name = "llvm.x86.sse2.pmovmskb.128"] fn pmovmskb(a: i8x16) -> i32; #[link_name = "llvm.x86.sse2.max.sd"] fn maxsd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.max.pd"] fn maxpd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.min.sd"] fn minsd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.min.pd"] fn minpd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.sqrt.sd"] fn sqrtsd(a: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.sqrt.pd"] fn sqrtpd(a: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.cmp.sd"] fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; #[link_name = "llvm.x86.sse2.cmp.pd"] fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; #[link_name = "llvm.x86.sse2.comieq.sd"] fn comieqsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comilt.sd"] fn comiltsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comile.sd"] fn comilesd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comigt.sd"] fn comigtsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comige.sd"] fn comigesd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.comineq.sd"] fn comineqsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomieq.sd"] fn ucomieqsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomilt.sd"] fn ucomiltsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomile.sd"] fn ucomilesd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomigt.sd"] fn ucomigtsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomige.sd"] fn ucomigesd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.ucomineq.sd"] fn ucomineqsd(a: __m128d, b: __m128d) -> i32; #[link_name = "llvm.x86.sse2.movmsk.pd"] fn movmskpd(a: __m128d) -> i32; #[link_name = "llvm.x86.sse2.cvtpd2ps"] fn cvtpd2ps(a: __m128d) -> __m128; #[link_name = "llvm.x86.sse2.cvtps2pd"] fn cvtps2pd(a: __m128) -> __m128d; #[link_name = "llvm.x86.sse2.cvtpd2dq"] fn cvtpd2dq(a: __m128d) -> i32x4; #[link_name = "llvm.x86.sse2.cvtsd2si"] fn cvtsd2si(a: __m128d) -> i32; #[link_name = "llvm.x86.sse2.cvtsd2ss"] fn cvtsd2ss(a: __m128, b: __m128d) -> __m128; #[link_name = "llvm.x86.sse2.cvtss2sd"] fn cvtss2sd(a: __m128d, b: __m128) -> __m128d; #[link_name = "llvm.x86.sse2.cvttpd2dq"] fn cvttpd2dq(a: __m128d) -> i32x4; #[link_name = "llvm.x86.sse2.cvttsd2si"] fn cvttsd2si(a: __m128d) -> i32; #[link_name = "llvm.x86.sse2.cvttps2dq"] fn cvttps2dq(a: __m128) -> i32x4; #[link_name = "llvm.x86.sse2.storeu.dq"] fn storeudq(mem_addr: *mut i8, a: __m128i); #[link_name = "llvm.x86.sse2.storeu.pd"] fn storeupd(mem_addr: *mut i8, a: __m128d); #[link_name = "llvm.x86.mmx.padd.q"] fn paddq(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.pmulu.dq"] fn pmuludq2(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.psub.q"] fn psubq(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.sse.cvtpi2pd"] fn cvtpi2pd(a: __m64) -> __m128d; #[link_name = "llvm.x86.sse.cvtpd2pi"] fn cvtpd2pi(a: __m128d) -> __m64; #[link_name = "llvm.x86.sse.cvttpd2pi"] fn cvttpd2pi(a: __m128d) -> __m64; } #[cfg(test)] mod tests { use std::f32; use std::f64::{self, NAN}; use std::i32; use std::mem::{self, transmute}; use crate::core_arch::{simd::*, x86::*}; use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. #[test] fn test_mm_pause() { unsafe { _mm_pause() } } #[simd_test(enable = "sse2")] unsafe fn test_mm_clflush() { let x = 0; _mm_clflush(&x as *const _ as *mut u8); } #[simd_test(enable = "sse2")] unsafe fn test_mm_lfence() { _mm_lfence(); } #[simd_test(enable = "sse2")] unsafe fn test_mm_mfence() { _mm_mfence(); } #[simd_test(enable = "sse2")] unsafe fn test_mm_add_epi8() { let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #[rustfmt::skip] let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); let r = _mm_add_epi8(a, b); #[rustfmt::skip] let e = _mm_setr_epi8( 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_add_epi8_overflow() { let a = _mm_set1_epi8(0x7F); let b = _mm_set1_epi8(1); let r = _mm_add_epi8(a, b); assert_eq_m128i(r, _mm_set1_epi8(-128)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_add_epi16() { let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); let r = _mm_add_epi16(a, b); let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_add_epi32() { let a = _mm_setr_epi32(0, 1, 2, 3); let b = _mm_setr_epi32(4, 5, 6, 7); let r = _mm_add_epi32(a, b); let e = _mm_setr_epi32(4, 6, 8, 10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_add_epi64() { let a = _mm_setr_epi64x(0, 1); let b = _mm_setr_epi64x(2, 3); let r = _mm_add_epi64(a, b); let e = _mm_setr_epi64x(2, 4); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epi8() { let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #[rustfmt::skip] let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); let r = _mm_adds_epi8(a, b); #[rustfmt::skip] let e = _mm_setr_epi8( 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epi8_saturate_positive() { let a = _mm_set1_epi8(0x7F); let b = _mm_set1_epi8(1); let r = _mm_adds_epi8(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epi8_saturate_negative() { let a = _mm_set1_epi8(-0x80); let b = _mm_set1_epi8(-1); let r = _mm_adds_epi8(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epi16() { let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); let r = _mm_adds_epi16(a, b); let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epi16_saturate_positive() { let a = _mm_set1_epi16(0x7FFF); let b = _mm_set1_epi16(1); let r = _mm_adds_epi16(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epi16_saturate_negative() { let a = _mm_set1_epi16(-0x8000); let b = _mm_set1_epi16(-1); let r = _mm_adds_epi16(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epu8() { let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); #[rustfmt::skip] let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); let r = _mm_adds_epu8(a, b); #[rustfmt::skip] let e = _mm_setr_epi8( 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epu8_saturate() { let a = _mm_set1_epi8(!0); let b = _mm_set1_epi8(1); let r = _mm_adds_epu8(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epu16() { let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); let r = _mm_adds_epu16(a, b); let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_adds_epu16_saturate() { let a = _mm_set1_epi16(!0); let b = _mm_set1_epi16(1); let r = _mm_adds_epu16(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_avg_epu8() { let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); let r = _mm_avg_epu8(a, b); assert_eq_m128i(r, _mm_set1_epi8(6)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_avg_epu16() { let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); let r = _mm_avg_epu16(a, b); assert_eq_m128i(r, _mm_set1_epi16(6)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_madd_epi16() { let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); let r = _mm_madd_epi16(a, b); let e = _mm_setr_epi32(29, 81, 149, 233); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_max_epi16() { let a = _mm_set1_epi16(1); let b = _mm_set1_epi16(-1); let r = _mm_max_epi16(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_max_epu8() { let a = _mm_set1_epi8(1); let b = _mm_set1_epi8(!0); let r = _mm_max_epu8(a, b); assert_eq_m128i(r, b); } #[simd_test(enable = "sse2")] unsafe fn test_mm_min_epi16() { let a = _mm_set1_epi16(1); let b = _mm_set1_epi16(-1); let r = _mm_min_epi16(a, b); assert_eq_m128i(r, b); } #[simd_test(enable = "sse2")] unsafe fn test_mm_min_epu8() { let a = _mm_set1_epi8(1); let b = _mm_set1_epi8(!0); let r = _mm_min_epu8(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_mulhi_epi16() { let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); let r = _mm_mulhi_epi16(a, b); assert_eq_m128i(r, _mm_set1_epi16(-16)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_mulhi_epu16() { let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); let r = _mm_mulhi_epu16(a, b); assert_eq_m128i(r, _mm_set1_epi16(15)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_mullo_epi16() { let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); let r = _mm_mullo_epi16(a, b); assert_eq_m128i(r, _mm_set1_epi16(-17960)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_mul_epu32() { let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); let r = _mm_mul_epu32(a, b); let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sad_epu8() { #[rustfmt::skip] let a = _mm_setr_epi8( 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, 1, 2, 3, 4, 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, 1, 2, 3, 4, ); let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); let r = _mm_sad_epu8(a, b); let e = _mm_setr_epi64x(1020, 614); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sub_epi8() { let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); let r = _mm_sub_epi8(a, b); assert_eq_m128i(r, _mm_set1_epi8(-1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sub_epi16() { let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); let r = _mm_sub_epi16(a, b); assert_eq_m128i(r, _mm_set1_epi16(-1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sub_epi32() { let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); let r = _mm_sub_epi32(a, b); assert_eq_m128i(r, _mm_set1_epi32(-1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sub_epi64() { let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); let r = _mm_sub_epi64(a, b); assert_eq_m128i(r, _mm_set1_epi64x(-1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epi8() { let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); let r = _mm_subs_epi8(a, b); assert_eq_m128i(r, _mm_set1_epi8(3)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epi8_saturate_positive() { let a = _mm_set1_epi8(0x7F); let b = _mm_set1_epi8(-1); let r = _mm_subs_epi8(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epi8_saturate_negative() { let a = _mm_set1_epi8(-0x80); let b = _mm_set1_epi8(1); let r = _mm_subs_epi8(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epi16() { let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); let r = _mm_subs_epi16(a, b); assert_eq_m128i(r, _mm_set1_epi16(3)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epi16_saturate_positive() { let a = _mm_set1_epi16(0x7FFF); let b = _mm_set1_epi16(-1); let r = _mm_subs_epi16(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epi16_saturate_negative() { let a = _mm_set1_epi16(-0x8000); let b = _mm_set1_epi16(1); let r = _mm_subs_epi16(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epu8() { let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); let r = _mm_subs_epu8(a, b); assert_eq_m128i(r, _mm_set1_epi8(3)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epu8_saturate() { let a = _mm_set1_epi8(0); let b = _mm_set1_epi8(1); let r = _mm_subs_epu8(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epu16() { let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); let r = _mm_subs_epu16(a, b); assert_eq_m128i(r, _mm_set1_epi16(3)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_subs_epu16_saturate() { let a = _mm_set1_epi16(0); let b = _mm_set1_epi16(1); let r = _mm_subs_epu16(a, b); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_slli_si128() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_slli_si128(a, 1); let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); assert_eq_m128i(r, e); #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_slli_si128(a, 15); let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); assert_eq_m128i(r, e); #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_slli_si128(a, 16); assert_eq_m128i(r, _mm_set1_epi8(0)); #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_slli_si128(a, -1); assert_eq_m128i(_mm_set1_epi8(0), r); #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_slli_si128(a, -0x80000000); assert_eq_m128i(r, _mm_set1_epi8(0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_slli_epi16() { #[rustfmt::skip] let a = _mm_setr_epi16( 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, ); let r = _mm_slli_epi16(a, 4); #[rustfmt::skip] let e = _mm_setr_epi16( 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0, 0, 0, 0, 0, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sll_epi16() { let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); let r = _mm_sll_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); assert_eq_m128i(r, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0)); let r = _mm_sll_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_slli_epi32() { let r = _mm_slli_epi32(_mm_set1_epi32(0xFFFF), 4); assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sll_epi32() { let a = _mm_set1_epi32(0xFFFF); let b = _mm_setr_epi32(4, 0, 0, 0); let r = _mm_sll_epi32(a, b); assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_slli_epi64() { let r = _mm_slli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4); assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sll_epi64() { let a = _mm_set1_epi64x(0xFFFFFFFF); let b = _mm_setr_epi64x(4, 0); let r = _mm_sll_epi64(a, b); assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_srai_epi16() { let r = _mm_srai_epi16(_mm_set1_epi16(-1), 1); assert_eq_m128i(r, _mm_set1_epi16(-1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sra_epi16() { let a = _mm_set1_epi16(-1); let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); let r = _mm_sra_epi16(a, b); assert_eq_m128i(r, _mm_set1_epi16(-1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_srai_epi32() { let r = _mm_srai_epi32(_mm_set1_epi32(-1), 1); assert_eq_m128i(r, _mm_set1_epi32(-1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sra_epi32() { let a = _mm_set1_epi32(-1); let b = _mm_setr_epi32(1, 0, 0, 0); let r = _mm_sra_epi32(a, b); assert_eq_m128i(r, _mm_set1_epi32(-1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_srli_si128() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_srli_si128(a, 1); #[rustfmt::skip] let e = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, ); assert_eq_m128i(r, e); #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_srli_si128(a, 15); let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_srli_si128(a, 16); assert_eq_m128i(r, _mm_set1_epi8(0)); #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_srli_si128(a, -1); assert_eq_m128i(r, _mm_set1_epi8(0)); #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_srli_si128(a, -0x80000000); assert_eq_m128i(r, _mm_set1_epi8(0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_srli_epi16() { #[rustfmt::skip] let a = _mm_setr_epi16( 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, ); let r = _mm_srli_epi16(a, 4); #[rustfmt::skip] let e = _mm_setr_epi16( 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_srl_epi16() { let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); let r = _mm_srl_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); assert_eq_m128i(r, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0)); let r = _mm_srl_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_srli_epi32() { let r = _mm_srli_epi32(_mm_set1_epi32(0xFFFF), 4); assert_eq_m128i(r, _mm_set1_epi32(0xFFF)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_srl_epi32() { let a = _mm_set1_epi32(0xFFFF); let b = _mm_setr_epi32(4, 0, 0, 0); let r = _mm_srl_epi32(a, b); assert_eq_m128i(r, _mm_set1_epi32(0xFFF)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_srli_epi64() { let r = _mm_srli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4); assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_srl_epi64() { let a = _mm_set1_epi64x(0xFFFFFFFF); let b = _mm_setr_epi64x(4, 0); let r = _mm_srl_epi64(a, b); assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_and_si128() { let a = _mm_set1_epi8(5); let b = _mm_set1_epi8(3); let r = _mm_and_si128(a, b); assert_eq_m128i(r, _mm_set1_epi8(1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_andnot_si128() { let a = _mm_set1_epi8(5); let b = _mm_set1_epi8(3); let r = _mm_andnot_si128(a, b); assert_eq_m128i(r, _mm_set1_epi8(2)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_or_si128() { let a = _mm_set1_epi8(5); let b = _mm_set1_epi8(3); let r = _mm_or_si128(a, b); assert_eq_m128i(r, _mm_set1_epi8(7)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_xor_si128() { let a = _mm_set1_epi8(5); let b = _mm_set1_epi8(3); let r = _mm_xor_si128(a, b); assert_eq_m128i(r, _mm_set1_epi8(6)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpeq_epi8() { let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); let r = _mm_cmpeq_epi8(a, b); #[rustfmt::skip] assert_eq_m128i( r, _mm_setr_epi8( 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) ); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpeq_epi16() { let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); let r = _mm_cmpeq_epi16(a, b); assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpeq_epi32() { let a = _mm_setr_epi32(0, 1, 2, 3); let b = _mm_setr_epi32(3, 2, 2, 0); let r = _mm_cmpeq_epi32(a, b); assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpgt_epi8() { let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); let b = _mm_set1_epi8(0); let r = _mm_cmpgt_epi8(a, b); let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpgt_epi16() { let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); let b = _mm_set1_epi16(0); let r = _mm_cmpgt_epi16(a, b); let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpgt_epi32() { let a = _mm_set_epi32(5, 0, 0, 0); let b = _mm_set1_epi32(0); let r = _mm_cmpgt_epi32(a, b); assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmplt_epi8() { let a = _mm_set1_epi8(0); let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); let r = _mm_cmplt_epi8(a, b); let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmplt_epi16() { let a = _mm_set1_epi16(0); let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); let r = _mm_cmplt_epi16(a, b); let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmplt_epi32() { let a = _mm_set1_epi32(0); let b = _mm_set_epi32(5, 0, 0, 0); let r = _mm_cmplt_epi32(a, b); assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtepi32_pd() { let a = _mm_set_epi32(35, 25, 15, 5); let r = _mm_cvtepi32_pd(a); assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsi32_sd() { let a = _mm_set1_pd(3.5); let r = _mm_cvtsi32_sd(a, 5); assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtepi32_ps() { let a = _mm_setr_epi32(1, 2, 3, 4); let r = _mm_cvtepi32_ps(a); assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtps_epi32() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let r = _mm_cvtps_epi32(a); assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsi32_si128() { let r = _mm_cvtsi32_si128(5); assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsi128_si32() { let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); assert_eq!(r, 5); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set_epi64x() { let r = _mm_set_epi64x(0, 1); assert_eq_m128i(r, _mm_setr_epi64x(1, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set_epi32() { let r = _mm_set_epi32(0, 1, 2, 3); assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set_epi16() { let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set_epi8() { #[rustfmt::skip] let r = _mm_set_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let e = _mm_setr_epi8( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set1_epi64x() { let r = _mm_set1_epi64x(1); assert_eq_m128i(r, _mm_set1_epi64x(1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set1_epi32() { let r = _mm_set1_epi32(1); assert_eq_m128i(r, _mm_set1_epi32(1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set1_epi16() { let r = _mm_set1_epi16(1); assert_eq_m128i(r, _mm_set1_epi16(1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set1_epi8() { let r = _mm_set1_epi8(1); assert_eq_m128i(r, _mm_set1_epi8(1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_setr_epi32() { let r = _mm_setr_epi32(0, 1, 2, 3); assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_setr_epi16() { let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_setr_epi8() { #[rustfmt::skip] let r = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let e = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_setzero_si128() { let r = _mm_setzero_si128(); assert_eq_m128i(r, _mm_set1_epi64x(0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_loadl_epi64() { let a = _mm_setr_epi64x(6, 5); let r = _mm_loadl_epi64(&a as *const _); assert_eq_m128i(r, _mm_setr_epi64x(6, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_load_si128() { let a = _mm_set_epi64x(5, 6); let r = _mm_load_si128(&a as *const _ as *const _); assert_eq_m128i(a, r); } #[simd_test(enable = "sse2")] unsafe fn test_mm_loadu_si128() { let a = _mm_set_epi64x(5, 6); let r = _mm_loadu_si128(&a as *const _ as *const _); assert_eq_m128i(a, r); } #[simd_test(enable = "sse2")] unsafe fn test_mm_maskmoveu_si128() { let a = _mm_set1_epi8(9); #[rustfmt::skip] let mask = _mm_set_epi8( 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ); let mut r = _mm_set1_epi8(0); _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8); let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_store_si128() { let a = _mm_set1_epi8(9); let mut r = _mm_set1_epi8(0); _mm_store_si128(&mut r as *mut _ as *mut __m128i, a); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_storeu_si128() { let a = _mm_set1_epi8(9); let mut r = _mm_set1_epi8(0); _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_storel_epi64() { let a = _mm_setr_epi64x(2, 9); let mut r = _mm_set1_epi8(0); _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a); assert_eq_m128i(r, _mm_setr_epi64x(2, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_stream_si128() { let a = _mm_setr_epi32(1, 2, 3, 4); let mut r = _mm_undefined_si128(); _mm_stream_si128(&mut r as *mut _, a); assert_eq_m128i(r, a); } #[simd_test(enable = "sse2")] unsafe fn test_mm_stream_si32() { let a: i32 = 7; let mut mem = ::std::boxed::Box::::new(-1); _mm_stream_si32(&mut *mem as *mut i32, a); assert_eq!(a, *mem); } #[simd_test(enable = "sse2")] unsafe fn test_mm_move_epi64() { let a = _mm_setr_epi64x(5, 6); let r = _mm_move_epi64(a); assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_packs_epi16() { let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); let r = _mm_packs_epi16(a, b); #[rustfmt::skip] assert_eq_m128i( r, _mm_setr_epi8( 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F ) ); } #[simd_test(enable = "sse2")] unsafe fn test_mm_packs_epi32() { let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); let r = _mm_packs_epi32(a, b); assert_eq_m128i( r, _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF), ); } #[simd_test(enable = "sse2")] unsafe fn test_mm_packus_epi16() { let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); let r = _mm_packus_epi16(a, b); assert_eq_m128i( r, _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0), ); } #[simd_test(enable = "sse2")] unsafe fn test_mm_extract_epi16() { let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); let r1 = _mm_extract_epi16(a, 0); let r2 = _mm_extract_epi16(a, 11); assert_eq!(r1, -1); assert_eq!(r2, 3); } #[simd_test(enable = "sse2")] unsafe fn test_mm_insert_epi16() { let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); let r = _mm_insert_epi16(a, 9, 0); let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_movemask_epi8() { #[rustfmt::skip] let a = _mm_setr_epi8( 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, 0b0101, 0b1111_0000u8 as i8, 0, 0, 0, 0, 0b1111_0000u8 as i8, 0b0101, 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, ); let r = _mm_movemask_epi8(a); assert_eq!(r, 0b10100100_00100101); } #[simd_test(enable = "sse2")] unsafe fn test_mm_shuffle_epi32() { let a = _mm_setr_epi32(5, 10, 15, 20); let r = _mm_shuffle_epi32(a, 0b00_01_01_11); let e = _mm_setr_epi32(20, 10, 10, 5); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_shufflehi_epi16() { let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); let r = _mm_shufflehi_epi16(a, 0b00_01_01_11); let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_shufflelo_epi16() { let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); let r = _mm_shufflelo_epi16(a, 0b00_01_01_11); let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpackhi_epi8() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); let r = _mm_unpackhi_epi8(a, b); #[rustfmt::skip] let e = _mm_setr_epi8( 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpackhi_epi16() { let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); let r = _mm_unpackhi_epi16(a, b); let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpackhi_epi32() { let a = _mm_setr_epi32(0, 1, 2, 3); let b = _mm_setr_epi32(4, 5, 6, 7); let r = _mm_unpackhi_epi32(a, b); let e = _mm_setr_epi32(2, 6, 3, 7); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpackhi_epi64() { let a = _mm_setr_epi64x(0, 1); let b = _mm_setr_epi64x(2, 3); let r = _mm_unpackhi_epi64(a, b); let e = _mm_setr_epi64x(1, 3); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpacklo_epi8() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); let r = _mm_unpacklo_epi8(a, b); #[rustfmt::skip] let e = _mm_setr_epi8( 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpacklo_epi16() { let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); let r = _mm_unpacklo_epi16(a, b); let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpacklo_epi32() { let a = _mm_setr_epi32(0, 1, 2, 3); let b = _mm_setr_epi32(4, 5, 6, 7); let r = _mm_unpacklo_epi32(a, b); let e = _mm_setr_epi32(0, 4, 1, 5); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpacklo_epi64() { let a = _mm_setr_epi64x(0, 1); let b = _mm_setr_epi64x(2, 3); let r = _mm_unpacklo_epi64(a, b); let e = _mm_setr_epi64x(0, 2); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_add_sd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_add_sd(a, b); assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_add_pd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_add_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_div_sd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_div_sd(a, b); assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_div_pd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_div_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_max_sd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_max_sd(a, b); assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_max_pd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_max_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_min_sd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_min_sd(a, b); assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_min_pd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_min_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_mul_sd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_mul_sd(a, b); assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_mul_pd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_mul_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sqrt_sd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_sqrt_sd(a, b); assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sqrt_pd() { let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sub_sd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_sub_sd(a, b); assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_sub_pd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(5.0, 10.0); let r = _mm_sub_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_and_pd() { let a = transmute(u64x2::splat(5)); let b = transmute(u64x2::splat(3)); let r = _mm_and_pd(a, b); let e = transmute(u64x2::splat(1)); assert_eq_m128d(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_andnot_pd() { let a = transmute(u64x2::splat(5)); let b = transmute(u64x2::splat(3)); let r = _mm_andnot_pd(a, b); let e = transmute(u64x2::splat(2)); assert_eq_m128d(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_or_pd() { let a = transmute(u64x2::splat(5)); let b = transmute(u64x2::splat(3)); let r = _mm_or_pd(a, b); let e = transmute(u64x2::splat(7)); assert_eq_m128d(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_xor_pd() { let a = transmute(u64x2::splat(5)); let b = transmute(u64x2::splat(3)); let r = _mm_xor_pd(a, b); let e = transmute(u64x2::splat(6)); assert_eq_m128d(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpeq_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(!0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmplt_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); let e = _mm_setr_epi64x(!0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmple_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(!0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpgt_sd() { let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(!0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpge_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(!0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpord_sd() { let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); let e = _mm_setr_epi64x(0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpunord_sd() { let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); let e = _mm_setr_epi64x(!0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpneq_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); let e = _mm_setr_epi64x(!0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpnlt_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); let e = _mm_setr_epi64x(0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpnle_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpngt_sd() { let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpnge_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(0, transmute(2.0f64)); let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpeq_pd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(!0, 0); let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmplt_pd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(0, !0); let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmple_pd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(!0, !0); let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpgt_pd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(0, 0); let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpge_pd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(!0, 0); let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpord_pd() { let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); let e = _mm_setr_epi64x(0, !0); let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpunord_pd() { let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); let e = _mm_setr_epi64x(!0, 0); let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpneq_pd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); let e = _mm_setr_epi64x(!0, !0); let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpnlt_pd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); let e = _mm_setr_epi64x(0, 0); let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpnle_pd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(0, 0); let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpngt_pd() { let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(0, !0); let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cmpnge_pd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); let e = _mm_setr_epi64x(0, !0); let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); assert_eq_m128i(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_comieq_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_comieq_sd(a, b) != 0); let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_comieq_sd(a, b) == 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_comilt_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_comilt_sd(a, b) == 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_comile_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_comile_sd(a, b) != 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_comigt_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_comigt_sd(a, b) == 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_comige_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_comige_sd(a, b) != 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_comineq_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_comineq_sd(a, b) == 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_ucomieq_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_ucomieq_sd(a, b) != 0); let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); assert!(_mm_ucomieq_sd(a, b) == 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_ucomilt_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_ucomilt_sd(a, b) == 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_ucomile_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_ucomile_sd(a, b) != 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_ucomigt_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_ucomigt_sd(a, b) == 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_ucomige_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_ucomige_sd(a, b) != 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_ucomineq_sd() { let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); assert!(_mm_ucomineq_sd(a, b) == 0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_movemask_pd() { let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); assert_eq!(r, 0b01); let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); assert_eq!(r, 0b11); } #[repr(align(16))] struct Memory { data: [f64; 4], } #[simd_test(enable = "sse2")] unsafe fn test_mm_load_pd() { let mem = Memory { data: [1.0f64, 2.0, 3.0, 4.0], }; let vals = &mem.data; let d = vals.as_ptr(); let r = _mm_load_pd(d); assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_load_sd() { let a = 1.; let expected = _mm_setr_pd(a, 0.); let r = _mm_load_sd(&a); assert_eq_m128d(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_loadh_pd() { let a = _mm_setr_pd(1., 2.); let b = 3.; let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); let r = _mm_loadh_pd(a, &b); assert_eq_m128d(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_loadl_pd() { let a = _mm_setr_pd(1., 2.); let b = 3.; let expected = _mm_setr_pd(3., get_m128d(a, 1)); let r = _mm_loadl_pd(a, &b); assert_eq_m128d(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_stream_pd() { #[repr(align(128))] struct Memory { pub data: [f64; 2], } let a = _mm_set1_pd(7.0); let mut mem = Memory { data: [-1.0; 2] }; _mm_stream_pd(&mut mem.data[0] as *mut f64, a); for i in 0..2 { assert_eq!(mem.data[i], get_m128d(a, i)); } } #[simd_test(enable = "sse2")] unsafe fn test_mm_store_sd() { let mut dest = 0.; let a = _mm_setr_pd(1., 2.); _mm_store_sd(&mut dest, a); assert_eq!(dest, _mm_cvtsd_f64(a)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_store_pd() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; let a = _mm_setr_pd(1.0, 2.0); let d = vals.as_mut_ptr(); _mm_store_pd(d, *black_box(&a)); assert_eq!(vals[0], 1.0); assert_eq!(vals[1], 2.0); } #[simd_test(enable = "sse")] unsafe fn test_mm_storeu_pd() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; let a = _mm_setr_pd(1.0, 2.0); let mut ofs = 0; let mut p = vals.as_mut_ptr(); // Make sure p is **not** aligned to 16-byte boundary if (p as usize) & 0xf == 0 { ofs = 1; p = p.offset(1); } _mm_storeu_pd(p, *black_box(&a)); if ofs > 0 { assert_eq!(vals[ofs - 1], 0.0); } assert_eq!(vals[ofs + 0], 1.0); assert_eq!(vals[ofs + 1], 2.0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_store1_pd() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; let a = _mm_setr_pd(1.0, 2.0); let d = vals.as_mut_ptr(); _mm_store1_pd(d, *black_box(&a)); assert_eq!(vals[0], 1.0); assert_eq!(vals[1], 1.0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_store_pd1() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; let a = _mm_setr_pd(1.0, 2.0); let d = vals.as_mut_ptr(); _mm_store_pd1(d, *black_box(&a)); assert_eq!(vals[0], 1.0); assert_eq!(vals[1], 1.0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_storer_pd() { let mut mem = Memory { data: [0.0f64; 4] }; let vals = &mut mem.data; let a = _mm_setr_pd(1.0, 2.0); let d = vals.as_mut_ptr(); _mm_storer_pd(d, *black_box(&a)); assert_eq!(vals[0], 2.0); assert_eq!(vals[1], 1.0); } #[simd_test(enable = "sse2")] unsafe fn test_mm_storeh_pd() { let mut dest = 0.; let a = _mm_setr_pd(1., 2.); _mm_storeh_pd(&mut dest, a); assert_eq!(dest, get_m128d(a, 1)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_storel_pd() { let mut dest = 0.; let a = _mm_setr_pd(1., 2.); _mm_storel_pd(&mut dest, a); assert_eq!(dest, _mm_cvtsd_f64(a)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_loadr_pd() { let mut mem = Memory { data: [1.0f64, 2.0, 3.0, 4.0], }; let vals = &mut mem.data; let d = vals.as_ptr(); let r = _mm_loadr_pd(d); assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_loadu_pd() { let mut mem = Memory { data: [1.0f64, 2.0, 3.0, 4.0], }; let vals = &mut mem.data; let mut d = vals.as_ptr(); // make sure d is not aligned to 16-byte boundary let mut offset = 0; if (d as usize) & 0xf == 0 { offset = 1; d = d.offset(offset as isize); } let r = _mm_loadu_pd(d); let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); assert_eq_m128d(r, e); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtpd_ps() { let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtps_pd() { let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); let r = _mm_cvtps_pd(_mm_setr_ps( f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN, )); assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtpd_epi32() { let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsd_si32() { let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); assert_eq!(r, -2); let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); assert_eq!(r, i32::MIN); let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); assert_eq!(r, i32::MIN); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsd_ss() { let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); let b = _mm_setr_pd(2.0, -5.0); let r = _mm_cvtsd_ss(a, b); assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); let b = _mm_setr_pd(f64::INFINITY, -5.0); let r = _mm_cvtsd_ss(a, b); assert_eq_m128( r, _mm_setr_ps( f32::INFINITY, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY, ), ); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsd_f64() { let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); assert_eq!(r, -1.1); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtss_sd() { let a = _mm_setr_pd(-1.1, 2.2); let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let r = _mm_cvtss_sd(a, b); assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); let a = _mm_setr_pd(-1.1, f64::INFINITY); let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); let r = _mm_cvtss_sd(a, b); assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvttpd_epi32() { let a = _mm_setr_pd(-1.1, 2.2); let r = _mm_cvttpd_epi32(a); assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); let r = _mm_cvttpd_epi32(a); assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvttsd_si32() { let a = _mm_setr_pd(-1.1, 2.2); let r = _mm_cvttsd_si32(a); assert_eq!(r, -1); let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); let r = _mm_cvttsd_si32(a); assert_eq!(r, i32::MIN); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvttps_epi32() { let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); let r = _mm_cvttps_epi32(a); assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); let r = _mm_cvttps_epi32(a); assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set_sd() { let r = _mm_set_sd(-1.0_f64); assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set1_pd() { let r = _mm_set1_pd(-1.0_f64); assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set_pd1() { let r = _mm_set_pd1(-2.0_f64); assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_set_pd() { let r = _mm_set_pd(1.0_f64, 5.0_f64); assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_setr_pd() { let r = _mm_setr_pd(1.0_f64, -5.0_f64); assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_setzero_pd() { let r = _mm_setzero_pd(); assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_load1_pd() { let d = -5.0; let r = _mm_load1_pd(&d); assert_eq_m128d(r, _mm_setr_pd(d, d)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_load_pd1() { let d = -5.0; let r = _mm_load_pd1(&d); assert_eq_m128d(r, _mm_setr_pd(d, d)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpackhi_pd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(3.0, 4.0); let r = _mm_unpackhi_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_unpacklo_pd() { let a = _mm_setr_pd(1.0, 2.0); let b = _mm_setr_pd(3.0, 4.0); let r = _mm_unpacklo_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_shuffle_pd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(3., 4.); let expected = _mm_setr_pd(1., 3.); let r = _mm_shuffle_pd(a, b, 0); assert_eq_m128d(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_move_sd() { let a = _mm_setr_pd(1., 2.); let b = _mm_setr_pd(3., 4.); let expected = _mm_setr_pd(3., 2.); let r = _mm_move_sd(a, b); assert_eq_m128d(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_castpd_ps() { let a = _mm_set1_pd(0.); let expected = _mm_set1_ps(0.); let r = _mm_castpd_ps(a); assert_eq_m128(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_castpd_si128() { let a = _mm_set1_pd(0.); let expected = _mm_set1_epi64x(0); let r = _mm_castpd_si128(a); assert_eq_m128i(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_castps_pd() { let a = _mm_set1_ps(0.); let expected = _mm_set1_pd(0.); let r = _mm_castps_pd(a); assert_eq_m128d(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_castps_si128() { let a = _mm_set1_ps(0.); let expected = _mm_set1_epi32(0); let r = _mm_castps_si128(a); assert_eq_m128i(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_castsi128_pd() { let a = _mm_set1_epi64x(0); let expected = _mm_set1_pd(0.); let r = _mm_castsi128_pd(a); assert_eq_m128d(r, expected); } #[simd_test(enable = "sse2")] unsafe fn test_mm_castsi128_ps() { let a = _mm_set1_epi32(0); let expected = _mm_set1_ps(0.); let r = _mm_castsi128_ps(a); assert_eq_m128(r, expected); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_add_si64() { let a = 1i64; let b = 2i64; let expected = 3i64; let r = _mm_add_si64(transmute(a), transmute(b)); assert_eq!(transmute::<__m64, i64>(r), expected); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_mul_su32() { let a = _mm_setr_pi32(1, 2); let b = _mm_setr_pi32(3, 4); let expected = 3u64; let r = _mm_mul_su32(a, b); assert_eq_m64(r, transmute(expected)); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_sub_si64() { let a = 1i64; let b = 2i64; let expected = -1i64; let r = _mm_sub_si64(transmute(a), transmute(b)); assert_eq!(transmute::<__m64, i64>(r), expected); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_cvtpi32_pd() { let a = _mm_setr_pi32(1, 2); let expected = _mm_setr_pd(1., 2.); let r = _mm_cvtpi32_pd(a); assert_eq_m128d(r, expected); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_set_epi64() { let r = _mm_set_epi64(transmute(1i64), transmute(2i64)); assert_eq_m128i(r, _mm_setr_epi64x(2, 1)); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_set1_epi64() { let r = _mm_set1_epi64(transmute(1i64)); assert_eq_m128i(r, _mm_setr_epi64x(1, 1)); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_setr_epi64() { let r = _mm_setr_epi64(transmute(1i64), transmute(2i64)); assert_eq_m128i(r, _mm_setr_epi64x(1, 2)); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_movepi64_pi64() { let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0)); assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_movpi64_epi64() { let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_cvtpd_pi32() { let a = _mm_setr_pd(5., 0.); let r = _mm_cvtpd_pi32(a); assert_eq_m64(r, _mm_setr_pi32(5, 0)); } #[simd_test(enable = "sse2,mmx")] unsafe fn test_mm_cvttpd_pi32() { use std::{f64, i32}; let a = _mm_setr_pd(5., 0.); let r = _mm_cvttpd_pi32(a); assert_eq_m64(r, _mm_setr_pi32(5, 0)); let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); let r = _mm_cvttpd_pi32(a); assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN)); } } core_arch-0.1.5/src/x86/sse3.rs010064400007650000024000000212201343447103600143220ustar0000000000000000//! Streaming SIMD Extensions 3 (SSE3) use crate::{ core_arch::{ simd::*, simd_llvm::{simd_shuffle2, simd_shuffle4}, x86::*, }, mem::transmute, }; #[cfg(test)] use stdsimd_test::assert_instr; /// Alternatively add and subtract packed single-precision (32-bit) /// floating-point elements in `a` to/from packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(addsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 { addsubps(a, b) } /// Alternatively add and subtract packed double-precision (64-bit) /// floating-point elements in `a` to/from packed elements in `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(addsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d { addsubpd(a, b) } /// Horizontally adds adjacent pairs of double-precision (64-bit) /// floating-point elements in `a` and `b`, and pack the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(haddpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d { haddpd(a, b) } /// Horizontally adds adjacent pairs of single-precision (32-bit) /// floating-point elements in `a` and `b`, and pack the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(haddps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 { haddps(a, b) } /// Horizontally subtract adjacent pairs of double-precision (64-bit) /// floating-point elements in `a` and `b`, and pack the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(hsubpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d { hsubpd(a, b) } /// Horizontally adds adjacent pairs of single-precision (32-bit) /// floating-point elements in `a` and `b`, and pack the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(hsubps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 { hsubps(a, b) } /// Loads 128-bits of integer data from unaligned memory. /// This intrinsic may perform better than `_mm_loadu_si128` /// when the data crosses a cache line boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(lddqu))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i { transmute(lddqu(mem_addr as *const _)) } /// Duplicate the low double-precision (64-bit) floating-point element /// from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movedup_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movddup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d { simd_shuffle2(a, a, [0, 0]) } /// Loads a double-precision (64-bit) floating-point element from memory /// into both elements of return vector. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movddup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d { _mm_load1_pd(mem_addr) } /// Duplicate odd-indexed single-precision (32-bit) floating-point elements /// from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movshdup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 { simd_shuffle4(a, a, [1, 1, 3, 3]) } /// Duplicate even-indexed single-precision (32-bit) floating-point elements /// from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps) #[inline] #[target_feature(enable = "sse3")] #[cfg_attr(test, assert_instr(movsldup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 { simd_shuffle4(a, a, [0, 0, 2, 2]) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse3.addsub.ps"] fn addsubps(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse3.addsub.pd"] fn addsubpd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse3.hadd.pd"] fn haddpd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse3.hadd.ps"] fn haddps(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse3.hsub.pd"] fn hsubpd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse3.hsub.ps"] fn hsubps(a: __m128, b: __m128) -> __m128; #[link_name = "llvm.x86.sse3.ldu.dq"] fn lddqu(mem_addr: *const i8) -> i8x16; } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "sse3")] unsafe fn test_mm_addsub_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_addsub_ps(a, b); assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0)); } #[simd_test(enable = "sse3")] unsafe fn test_mm_addsub_pd() { let a = _mm_setr_pd(-1.0, 5.0); let b = _mm_setr_pd(-100.0, 20.0); let r = _mm_addsub_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0)); } #[simd_test(enable = "sse3")] unsafe fn test_mm_hadd_pd() { let a = _mm_setr_pd(-1.0, 5.0); let b = _mm_setr_pd(-100.0, 20.0); let r = _mm_hadd_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0)); } #[simd_test(enable = "sse3")] unsafe fn test_mm_hadd_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_hadd_ps(a, b); assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0)); } #[simd_test(enable = "sse3")] unsafe fn test_mm_hsub_pd() { let a = _mm_setr_pd(-1.0, 5.0); let b = _mm_setr_pd(-100.0, 20.0); let r = _mm_hsub_pd(a, b); assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0)); } #[simd_test(enable = "sse3")] unsafe fn test_mm_hsub_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0); let r = _mm_hsub_ps(a, b); assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0)); } #[simd_test(enable = "sse3")] unsafe fn test_mm_lddqu_si128() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); let r = _mm_lddqu_si128(&a); assert_eq_m128i(a, r); } #[simd_test(enable = "sse3")] unsafe fn test_mm_movedup_pd() { let a = _mm_setr_pd(-1.0, 5.0); let r = _mm_movedup_pd(a); assert_eq_m128d(r, _mm_setr_pd(-1.0, -1.0)); } #[simd_test(enable = "sse3")] unsafe fn test_mm_movehdup_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let r = _mm_movehdup_ps(a); assert_eq_m128(r, _mm_setr_ps(5.0, 5.0, -10.0, -10.0)); } #[simd_test(enable = "sse3")] unsafe fn test_mm_moveldup_ps() { let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0); let r = _mm_moveldup_ps(a); assert_eq_m128(r, _mm_setr_ps(-1.0, -1.0, 0.0, 0.0)); } #[simd_test(enable = "sse3")] unsafe fn test_mm_loaddup_pd() { let d = -5.0; let r = _mm_loaddup_pd(&d); assert_eq_m128d(r, _mm_setr_pd(d, d)); } } core_arch-0.1.5/src/x86/sse41.rs010064400007650000024000002043561343447103600144210ustar0000000000000000//! Streaming SIMD Extensions 4.1 (SSE4.1) use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, mem::transmute, }; #[cfg(test)] use stdsimd_test::assert_instr; // SSE4 rounding constans /// round to nearest #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00; /// round down #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01; /// round up #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TO_POS_INF: i32 = 0x02; /// truncate #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TO_ZERO: i32 = 0x03; /// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE` #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04; /// do not suppress exceptions #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_RAISE_EXC: i32 = 0x00; /// suppress exceptions #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_NO_EXC: i32 = 0x08; /// round to nearest and do not suppress exceptions #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_NINT: i32 = 0x00; /// round down and do not suppress exceptions #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_FLOOR: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF); /// round up and do not suppress exceptions #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_CEIL: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF); /// truncate and do not suppress exceptions #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_TRUNC: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO); /// use MXCSR.RC and do not suppress exceptions; see /// `vendor::_MM_SET_ROUNDING_MODE` #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_RINT: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION); /// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE` #[stable(feature = "simd_x86", since = "1.27.0")] pub const _MM_FROUND_NEARBYINT: i32 = (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION); /// Blend packed 8-bit integers from `a` and `b` using `mask` /// /// The high bit of each corresponding mask byte determines the selection. /// If the high bit is set the element of `a` is selected. The element /// of `b` is selected otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pblendvb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i { transmute(pblendvb(a.as_i8x16(), b.as_i8x16(), mask.as_i8x16())) } /// Blend packed 16-bit integers from `a` and `b` using the mask `imm8`. /// /// The mask bits determine the selection. A clear bit selects the /// corresponding element of `a`, and a set bit the corresponding /// element of `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16) #[inline] #[target_feature(enable = "sse4.1")] // Note: LLVM7 prefers the single-precision floating-point domain when possible // see https://bugs.llvm.org/show_bug.cgi?id=38195 // #[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))] #[cfg_attr(test, assert_instr(blendps, imm8 = 0xF0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let a = a.as_i16x8(); let b = b.as_i16x8(); macro_rules! call { ($imm8:expr) => { pblendw(a, b, $imm8) }; } transmute(constify_imm8!(imm8, call)) } /// Blend packed double-precision (64-bit) floating-point elements from `a` /// and `b` using `mask` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendvpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d { blendvpd(a, b, mask) } /// Blend packed single-precision (32-bit) floating-point elements from `a` /// and `b` using `mask` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendvps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 { blendvps(a, b, mask) } /// Blend packed double-precision (64-bit) floating-point elements from `a` /// and `b` using control mask `imm2` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd) #[inline] #[target_feature(enable = "sse4.1")] // Note: LLVM7 prefers the single-precision floating-point domain when possible // see https://bugs.llvm.org/show_bug.cgi?id=38195 // #[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))] #[cfg_attr(test, assert_instr(blendps, imm2 = 0b10))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d { macro_rules! call { ($imm2:expr) => { blendpd(a, b, $imm2) }; } constify_imm2!(imm2, call) } /// Blend packed single-precision (32-bit) floating-point elements from `a` /// and `b` using mask `imm4` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 { macro_rules! call { ($imm4:expr) => { blendps(a, b, $imm4) }; } constify_imm4!(imm4, call) } /// Extracts a single-precision (32-bit) floating-point element from `a`, /// selected with `imm8` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(extractps, imm8 = 0) )] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 { transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11)) } /// Extracts an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit /// integer containing the zero-extended integer data. /// /// See [LLVM commit D20468][https://reviews.llvm.org/D20468]. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pextrb, imm8 = 0))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 { let imm8 = (imm8 & 15) as u32; simd_extract::<_, u8>(a.as_u8x16(), imm8) as i32 } /// Extracts an 32-bit integer from `a` selected with `imm8` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr( all(test, not(target_os = "windows")), assert_instr(extractps, imm8 = 1) )] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 { let imm8 = (imm8 & 3) as u32; simd_extract::<_, i32>(a.as_i32x4(), imm8) } /// Select a single value in `a` to store at some position in `b`, /// Then zero elements according to `imm8`. /// /// `imm8` specifies which bits from operand `a` will be copied, which bits in /// the result they will be copied to, and which bits in the result will be /// cleared. The following assignments are made: /// /// * Bits `[7:6]` specify the bits to copy from operand `a`: /// - `00`: Selects bits `[31:0]` from operand `a`. /// - `01`: Selects bits `[63:32]` from operand `a`. /// - `10`: Selects bits `[95:64]` from operand `a`. /// - `11`: Selects bits `[127:96]` from operand `a`. /// /// * Bits `[5:4]` specify the bits in the result to which the selected bits /// from operand `a` are copied: /// - `00`: Copies the selected bits from `a` to result bits `[31:0]`. /// - `01`: Copies the selected bits from `a` to result bits `[63:32]`. /// - `10`: Copies the selected bits from `a` to result bits `[95:64]`. /// - `11`: Copies the selected bits from `a` to result bits `[127:96]`. /// /// * Bits `[3:0]`: If any of these bits are set, the corresponding result /// element is cleared. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { insertps(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Returns a copy of `a` with the 8-bit integer from `i` inserted at a /// location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i { transmute(simd_insert(a.as_i8x16(), (imm8 & 0b1111) as u32, i as i8)) } /// Returns a copy of `a` with the 32-bit integer from `i` inserted at a /// location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i { transmute(simd_insert(a.as_i32x4(), (imm8 & 0b11) as u32, i)) } /// Compares packed 8-bit integers in `a` and `b` and returns packed maximum /// values in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { transmute(pmaxsb(a.as_i8x16(), b.as_i8x16())) } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed /// maximum. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { transmute(pmaxuw(a.as_u16x8(), b.as_u16x8())) } /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum /// values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(pmaxsd(a.as_i32x4(), b.as_i32x4())) } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed /// maximum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmaxud))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { transmute(pmaxud(a.as_u32x4(), b.as_u32x4())) } /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum /// values in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { transmute(pminsb(a.as_i8x16(), b.as_i8x16())) } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed /// minimum. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { transmute(pminuw(a.as_u16x8(), b.as_u16x8())) } /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum /// values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(pminsd(a.as_i32x4(), b.as_i32x4())) } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed /// minimum values. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pminud))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { transmute(pminud(a.as_u32x4(), b.as_u32x4())) } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers /// using unsigned saturation /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(packusdw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) } /// Compares packed 64-bit integers in `a` and `b` for equality /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pcmpeqq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i { transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i { let a = a.as_i8x16(); let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute(simd_cast::<_, i16x8>(a)) } /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i { let a = a.as_i8x16(); let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]); transmute(simd_cast::<_, i32x4>(a)) } /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed /// 64-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i { let a = a.as_i8x16(); let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i { let a = a.as_i16x8(); let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]); transmute(simd_cast::<_, i32x4>(a)) } /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i { let a = a.as_i16x8(); let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovsxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i { let a = a.as_i32x4(); let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i { let a = a.as_u8x16(); let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]); transmute(simd_cast::<_, i16x8>(a)) } /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i { let a = a.as_u8x16(); let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]); transmute(simd_cast::<_, i32x4>(a)) } /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxbq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i { let a = a.as_u8x16(); let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } /// Zeroes extend packed unsigned 16-bit integers in `a` /// to packed 32-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxwd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i { let a = a.as_u16x8(); let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]); transmute(simd_cast::<_, i32x4>(a)) } /// Zeroes extend packed unsigned 16-bit integers in `a` /// to packed 64-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxwq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i { let a = a.as_u16x8(); let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } /// Zeroes extend packed unsigned 32-bit integers in `a` /// to packed 64-bit integers /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmovzxdq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i { let a = a.as_u32x4(); let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]); transmute(simd_cast::<_, i64x2>(a)) } /// Returns the dot product of two __m128d vectors. /// /// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask. /// If a condition mask bit is zero, the corresponding multiplication is /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of /// the dot product will be stored in the return value component. Otherwise if /// the broadcast mask bit is zero then the return component will be zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(dppd, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { macro_rules! call { ($imm8:expr) => { dppd(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Returns the dot product of two __m128 vectors. /// /// `imm8[3:0]` is the broadcast mask, and `imm8[7:4]` is the condition mask. /// If a condition mask bit is zero, the corresponding multiplication is /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of /// the dot product will be stored in the return value component. Otherwise if /// the broadcast mask bit is zero then the return component will be zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(dpps, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 { macro_rules! call { ($imm8:expr) => { dpps(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Round the packed double-precision (64-bit) floating-point elements in `a` /// down to an integer value, and stores the results as packed double-precision /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d { roundpd(a, _MM_FROUND_FLOOR) } /// Round the packed single-precision (32-bit) floating-point elements in `a` /// down to an integer value, and stores the results as packed single-precision /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 { roundps(a, _MM_FROUND_FLOOR) } /// Round the lower double-precision (64-bit) floating-point element in `b` /// down to an integer value, store the result as a double-precision /// floating-point element in the lower element of the intrinsic result, /// and copies the upper element from `a` to the upper element of the intrinsic /// result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d { roundsd(a, b, _MM_FROUND_FLOOR) } /// Round the lower single-precision (32-bit) floating-point element in `b` /// down to an integer value, store the result as a single-precision /// floating-point element in the lower element of the intrinsic result, /// and copies the upper 3 packed elements from `a` to the upper elements /// of the intrinsic result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 { roundss(a, b, _MM_FROUND_FLOOR) } /// Round the packed double-precision (64-bit) floating-point elements in `a` /// up to an integer value, and stores the results as packed double-precision /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d { roundpd(a, _MM_FROUND_CEIL) } /// Round the packed single-precision (32-bit) floating-point elements in `a` /// up to an integer value, and stores the results as packed single-precision /// floating-point elements. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 { roundps(a, _MM_FROUND_CEIL) } /// Round the lower double-precision (64-bit) floating-point element in `b` /// up to an integer value, store the result as a double-precision /// floating-point element in the lower element of the intrisic result, /// and copies the upper element from `a` to the upper element /// of the intrinsic result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d { roundsd(a, b, _MM_FROUND_CEIL) } /// Round the lower single-precision (32-bit) floating-point element in `b` /// up to an integer value, store the result as a single-precision /// floating-point element in the lower element of the intrinsic result, /// and copies the upper 3 packed elements from `a` to the upper elements /// of the intrinsic result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 { roundss(a, b, _MM_FROUND_CEIL) } /// Round the packed double-precision (64-bit) floating-point elements in `a` /// using the `rounding` parameter, and stores the results as packed /// double-precision floating-point elements. /// Rounding is done according to the rounding parameter, which can be one of: /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// // round to nearest, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; /// // round down, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; /// // round up, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; /// // truncate, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: /// # let _x = /// _MM_FROUND_CUR_DIRECTION; /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundpd, rounding = 0))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d { macro_rules! call { ($imm4:expr) => { roundpd(a, $imm4) }; } constify_imm4!(rounding, call) } /// Round the packed single-precision (32-bit) floating-point elements in `a` /// using the `rounding` parameter, and stores the results as packed /// single-precision floating-point elements. /// Rounding is done according to the rounding parameter, which can be one of: /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// // round to nearest, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; /// // round down, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; /// // round up, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; /// // truncate, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: /// # let _x = /// _MM_FROUND_CUR_DIRECTION; /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundps, rounding = 0))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 { macro_rules! call { ($imm4:expr) => { roundps(a, $imm4) }; } constify_imm4!(rounding, call) } /// Round the lower double-precision (64-bit) floating-point element in `b` /// using the `rounding` parameter, store the result as a double-precision /// floating-point element in the lower element of the intrinsic result, /// and copies the upper element from `a` to the upper element of the intrinsic /// result. /// Rounding is done according to the rounding parameter, which can be one of: /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// // round to nearest, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; /// // round down, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; /// // round up, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; /// // truncate, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: /// # let _x = /// _MM_FROUND_CUR_DIRECTION; /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundsd, rounding = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d { macro_rules! call { ($imm4:expr) => { roundsd(a, b, $imm4) }; } constify_imm4!(rounding, call) } /// Round the lower single-precision (32-bit) floating-point element in `b` /// using the `rounding` parameter, store the result as a single-precision /// floating-point element in the lower element of the intrinsic result, /// and copies the upper 3 packed elements from `a` to the upper elements /// of the instrinsic result. /// Rounding is done according to the rounding parameter, which can be one of: /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// // round to nearest, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; /// // round down, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC; /// // round up, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC; /// // truncate, and suppress exceptions: /// # let _x = /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC; /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`: /// # let _x = /// _MM_FROUND_CUR_DIRECTION; /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(roundss, rounding = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 { macro_rules! call { ($imm4:expr) => { roundss(a, b, $imm4) }; } constify_imm4!(rounding, call) } /// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector, /// returning a vector containing its value in its first position, and its /// index /// in its second position; all other elements are set to zero. /// /// This intrinsic corresponds to the VPHMINPOSUW / PHMINPOSUW /// instruction. /// /// Arguments: /// /// * `a` - A 128-bit vector of type `__m128i`. /// /// Returns: /// /// A 128-bit value where: /// /// * bits `[15:0]` - contain the minimum value found in parameter `a`, /// * bits `[18:16]` - contain the index of the minimum value /// * remaining bits are set to `0`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(phminposuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i { transmute(phminposuw(a.as_u16x8())) } /// Multiplies the low 32-bit integers from each packed 64-bit /// element in `a` and `b`, and returns the signed 64-bit result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmuldq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(pmuldq(a.as_i32x4(), b.as_i32x4())) } /// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate /// 64-bit integers, and returns the lowest 32-bit, whatever they might be, /// reinterpreted as a signed integer. While `pmulld __m128i::splat(2), /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping /// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would /// return a negative number. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi32) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pmulld))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) } /// Subtracts 8-bit unsigned integer values and computes the absolute /// values of the differences to the corresponding bits in the destination. /// Then sums of the absolute differences are returned according to the bit /// fields in the immediate operand. /// /// The following algorithm is performed: /// /// ```ignore /// i = imm8[2] * 4 /// j = imm8[1:0] * 4 /// for k := 0 to 7 /// d0 = abs(a[i + k + 0] - b[j + 0]) /// d1 = abs(a[i + k + 1] - b[j + 1]) /// d2 = abs(a[i + k + 2] - b[j + 2]) /// d3 = abs(a[i + k + 3] - b[j + 3]) /// r[k] = d0 + d1 + d2 + d3 /// ``` /// /// Arguments: /// /// * `a` - A 128-bit vector of type `__m128i`. /// * `b` - A 128-bit vector of type `__m128i`. /// * `imm8` - An 8-bit immediate operand specifying how the absolute /// differences are to be calculated /// * Bit `[2]` specify the offset for operand `a` /// * Bits `[1:0]` specify the offset for operand `b` /// /// Returns: /// /// * A `__m128i` vector containing the sums of the sets of absolute /// differences between both operands. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let a = a.as_u8x16(); let b = b.as_u8x16(); macro_rules! call { ($imm8:expr) => { mpsadbw(a, b, $imm8) }; } transmute(constify_imm3!(imm8, call)) } /// Tests whether the specified bits in a 128-bit integer vector are all /// zeros. /// /// Arguments: /// /// * `a` - A 128-bit integer vector containing the bits to be tested. /// * `mask` - A 128-bit integer vector selecting which bits to test in /// operand `a`. /// /// Returns: /// /// * `1` - if the specified bits are all zeros, /// * `0` - otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { ptestz(a.as_i64x2(), mask.as_i64x2()) } /// Tests whether the specified bits in a 128-bit integer vector are all /// ones. /// /// Arguments: /// /// * `a` - A 128-bit integer vector containing the bits to be tested. /// * `mask` - A 128-bit integer vector selecting which bits to test in /// operand `a`. /// /// Returns: /// /// * `1` - if the specified bits are all ones, /// * `0` - otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { ptestc(a.as_i64x2(), mask.as_i64x2()) } /// Tests whether the specified bits in a 128-bit integer vector are /// neither all zeros nor all ones. /// /// Arguments: /// /// * `a` - A 128-bit integer vector containing the bits to be tested. /// * `mask` - A 128-bit integer vector selecting which bits to test in /// operand `a`. /// /// Returns: /// /// * `1` - if the specified bits are neither all zeros nor all ones, /// * `0` - otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { ptestnzc(a.as_i64x2(), mask.as_i64x2()) } /// Tests whether the specified bits in a 128-bit integer vector are all /// zeros. /// /// Arguments: /// /// * `a` - A 128-bit integer vector containing the bits to be tested. /// * `mask` - A 128-bit integer vector selecting which bits to test in /// operand `a`. /// /// Returns: /// /// * `1` - if the specified bits are all zeros, /// * `0` - otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { _mm_testz_si128(a, mask) } /// Tests whether the specified bits in `a` 128-bit integer vector are all /// ones. /// /// Argument: /// /// * `a` - A 128-bit integer vector containing the bits to be tested. /// /// Returns: /// /// * `1` - if the bits specified in the operand are all set to 1, /// * `0` - otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pcmpeqd))] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) } /// Tests whether the specified bits in a 128-bit integer vector are /// neither all zeros nor all ones. /// /// Arguments: /// /// * `a` - A 128-bit integer vector containing the bits to be tested. /// * `mask` - A 128-bit integer vector selecting which bits to test in /// operand `a`. /// /// Returns: /// /// * `1` - if the specified bits are neither all zeros nor all ones, /// * `0` - otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_mix_ones_zeros) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(ptest))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { _mm_testnzc_si128(a, mask) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse41.pblendvb"] fn pblendvb(a: i8x16, b: i8x16, mask: i8x16) -> i8x16; #[link_name = "llvm.x86.sse41.blendvpd"] fn blendvpd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d; #[link_name = "llvm.x86.sse41.blendvps"] fn blendvps(a: __m128, b: __m128, mask: __m128) -> __m128; #[link_name = "llvm.x86.sse41.blendpd"] fn blendpd(a: __m128d, b: __m128d, imm2: u8) -> __m128d; #[link_name = "llvm.x86.sse41.blendps"] fn blendps(a: __m128, b: __m128, imm4: u8) -> __m128; #[link_name = "llvm.x86.sse41.pblendw"] fn pblendw(a: i16x8, b: i16x8, imm8: u8) -> i16x8; #[link_name = "llvm.x86.sse41.insertps"] fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128; #[link_name = "llvm.x86.sse41.pmaxsb"] fn pmaxsb(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.x86.sse41.pmaxuw"] fn pmaxuw(a: u16x8, b: u16x8) -> u16x8; #[link_name = "llvm.x86.sse41.pmaxsd"] fn pmaxsd(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.sse41.pmaxud"] fn pmaxud(a: u32x4, b: u32x4) -> u32x4; #[link_name = "llvm.x86.sse41.pminsb"] fn pminsb(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.x86.sse41.pminuw"] fn pminuw(a: u16x8, b: u16x8) -> u16x8; #[link_name = "llvm.x86.sse41.pminsd"] fn pminsd(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.sse41.pminud"] fn pminud(a: u32x4, b: u32x4) -> u32x4; #[link_name = "llvm.x86.sse41.packusdw"] fn packusdw(a: i32x4, b: i32x4) -> u16x8; #[link_name = "llvm.x86.sse41.dppd"] fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; #[link_name = "llvm.x86.sse41.dpps"] fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128; #[link_name = "llvm.x86.sse41.round.pd"] fn roundpd(a: __m128d, rounding: i32) -> __m128d; #[link_name = "llvm.x86.sse41.round.ps"] fn roundps(a: __m128, rounding: i32) -> __m128; #[link_name = "llvm.x86.sse41.round.sd"] fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d; #[link_name = "llvm.x86.sse41.round.ss"] fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128; #[link_name = "llvm.x86.sse41.phminposuw"] fn phminposuw(a: u16x8) -> u16x8; #[link_name = "llvm.x86.sse41.pmuldq"] fn pmuldq(a: i32x4, b: i32x4) -> i64x2; #[link_name = "llvm.x86.sse41.mpsadbw"] fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8; #[link_name = "llvm.x86.sse41.ptestz"] fn ptestz(a: i64x2, mask: i64x2) -> i32; #[link_name = "llvm.x86.sse41.ptestc"] fn ptestc(a: i64x2, mask: i64x2) -> i32; #[link_name = "llvm.x86.sse41.ptestnzc"] fn ptestnzc(a: i64x2, mask: i64x2) -> i32; } #[cfg(test)] mod tests { use crate::core_arch::x86::*; use std::mem; use stdsimd_test::simd_test; #[simd_test(enable = "sse4.1")] unsafe fn test_mm_blendv_epi8() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); #[rustfmt::skip] let b = _mm_setr_epi8( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, ); #[rustfmt::skip] let mask = _mm_setr_epi8( 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, ); #[rustfmt::skip] let e = _mm_setr_epi8( 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31, ); assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_blendv_pd() { let a = _mm_set1_pd(0.0); let b = _mm_set1_pd(1.0); let mask = transmute(_mm_setr_epi64x(0, -1)); let r = _mm_blendv_pd(a, b, mask); let e = _mm_setr_pd(0.0, 1.0); assert_eq_m128d(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_blendv_ps() { let a = _mm_set1_ps(0.0); let b = _mm_set1_ps(1.0); let mask = transmute(_mm_setr_epi32(0, -1, 0, -1)); let r = _mm_blendv_ps(a, b, mask); let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_blend_pd() { let a = _mm_set1_pd(0.0); let b = _mm_set1_pd(1.0); let r = _mm_blend_pd(a, b, 0b10); let e = _mm_setr_pd(0.0, 1.0); assert_eq_m128d(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_blend_ps() { let a = _mm_set1_ps(0.0); let b = _mm_set1_ps(1.0); let r = _mm_blend_ps(a, b, 0b1010); let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0); assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_blend_epi16() { let a = _mm_set1_epi16(0); let b = _mm_set1_epi16(1); let r = _mm_blend_epi16(a, b, 0b1010_1100); let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_extract_ps() { let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0); let r: f32 = transmute(_mm_extract_ps(a, 1)); assert_eq!(r, 1.0); let r: f32 = transmute(_mm_extract_ps(a, 5)); assert_eq!(r, 1.0); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_extract_epi8() { #[rustfmt::skip] let a = _mm_setr_epi8( -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); let r1 = _mm_extract_epi8(a, 0); let r2 = _mm_extract_epi8(a, 19); assert_eq!(r1, 0xFF); assert_eq!(r2, 3); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_extract_epi32() { let a = _mm_setr_epi32(0, 1, 2, 3); let r = _mm_extract_epi32(a, 1); assert_eq!(r, 1); let r = _mm_extract_epi32(a, 5); assert_eq!(r, 1); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_insert_ps() { let a = _mm_set1_ps(1.0); let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); let r = _mm_insert_ps(a, b, 0b11_00_1100); let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0); assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_insert_epi8() { let a = _mm_set1_epi8(0); let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); let r = _mm_insert_epi8(a, 32, 1); assert_eq_m128i(r, e); let r = _mm_insert_epi8(a, 32, 17); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_insert_epi32() { let a = _mm_set1_epi32(0); let e = _mm_setr_epi32(0, 32, 0, 0); let r = _mm_insert_epi32(a, 32, 1); assert_eq_m128i(r, e); let r = _mm_insert_epi32(a, 32, 5); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_max_epi8() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29, 32, ); #[rustfmt::skip] let b = _mm_setr_epi8( 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31, ); let r = _mm_max_epi8(a, b); #[rustfmt::skip] let e = _mm_setr_epi8( 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_max_epu16() { let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); let r = _mm_max_epu16(a, b); let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_max_epi32() { let a = _mm_setr_epi32(1, 4, 5, 8); let b = _mm_setr_epi32(2, 3, 6, 7); let r = _mm_max_epi32(a, b); let e = _mm_setr_epi32(2, 4, 6, 8); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_max_epu32() { let a = _mm_setr_epi32(1, 4, 5, 8); let b = _mm_setr_epi32(2, 3, 6, 7); let r = _mm_max_epu32(a, b); let e = _mm_setr_epi32(2, 4, 6, 8); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_min_epi8_1() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29, 32, ); #[rustfmt::skip] let b = _mm_setr_epi8( 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31, ); let r = _mm_min_epi8(a, b); #[rustfmt::skip] let e = _mm_setr_epi8( 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_min_epi8_2() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, -4, -5, 8, -9, -12, 13, -16, 17, 20, 21, 24, 25, 28, 29, 32, ); #[rustfmt::skip] let b = _mm_setr_epi8( 2, -3, -6, 7, -10, -11, 14, -15, 18, 19, 22, 23, 26, 27, 30, 31, ); let r = _mm_min_epi8(a, b); #[rustfmt::skip] let e = _mm_setr_epi8( 1, -4, -6, 7, -10, -12, 13, -16, 17, 19, 21, 23, 25, 27, 29, 31, ); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_min_epu16() { let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16); let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15); let r = _mm_min_epu16(a, b); let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_min_epi32_1() { let a = _mm_setr_epi32(1, 4, 5, 8); let b = _mm_setr_epi32(2, 3, 6, 7); let r = _mm_min_epi32(a, b); let e = _mm_setr_epi32(1, 3, 5, 7); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_min_epi32_2() { let a = _mm_setr_epi32(-1, 4, 5, -7); let b = _mm_setr_epi32(-2, 3, -6, 8); let r = _mm_min_epi32(a, b); let e = _mm_setr_epi32(-2, 3, -6, -7); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_min_epu32() { let a = _mm_setr_epi32(1, 4, 5, 8); let b = _mm_setr_epi32(2, 3, 6, 7); let r = _mm_min_epu32(a, b); let e = _mm_setr_epi32(1, 3, 5, 7); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_packus_epi32() { let a = _mm_setr_epi32(1, 2, 3, 4); let b = _mm_setr_epi32(-1, -2, -3, -4); let r = _mm_packus_epi32(a, b); let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cmpeq_epi64() { let a = _mm_setr_epi64x(0, 1); let b = _mm_setr_epi64x(0, 0); let r = _mm_cmpeq_epi64(a, b); let e = _mm_setr_epi64x(-1, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepi8_epi16() { let a = _mm_set1_epi8(10); let r = _mm_cvtepi8_epi16(a); let e = _mm_set1_epi16(10); assert_eq_m128i(r, e); let a = _mm_set1_epi8(-10); let r = _mm_cvtepi8_epi16(a); let e = _mm_set1_epi16(-10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepi8_epi32() { let a = _mm_set1_epi8(10); let r = _mm_cvtepi8_epi32(a); let e = _mm_set1_epi32(10); assert_eq_m128i(r, e); let a = _mm_set1_epi8(-10); let r = _mm_cvtepi8_epi32(a); let e = _mm_set1_epi32(-10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepi8_epi64() { let a = _mm_set1_epi8(10); let r = _mm_cvtepi8_epi64(a); let e = _mm_set1_epi64x(10); assert_eq_m128i(r, e); let a = _mm_set1_epi8(-10); let r = _mm_cvtepi8_epi64(a); let e = _mm_set1_epi64x(-10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepi16_epi32() { let a = _mm_set1_epi16(10); let r = _mm_cvtepi16_epi32(a); let e = _mm_set1_epi32(10); assert_eq_m128i(r, e); let a = _mm_set1_epi16(-10); let r = _mm_cvtepi16_epi32(a); let e = _mm_set1_epi32(-10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepi16_epi64() { let a = _mm_set1_epi16(10); let r = _mm_cvtepi16_epi64(a); let e = _mm_set1_epi64x(10); assert_eq_m128i(r, e); let a = _mm_set1_epi16(-10); let r = _mm_cvtepi16_epi64(a); let e = _mm_set1_epi64x(-10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepi32_epi64() { let a = _mm_set1_epi32(10); let r = _mm_cvtepi32_epi64(a); let e = _mm_set1_epi64x(10); assert_eq_m128i(r, e); let a = _mm_set1_epi32(-10); let r = _mm_cvtepi32_epi64(a); let e = _mm_set1_epi64x(-10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepu8_epi16() { let a = _mm_set1_epi8(10); let r = _mm_cvtepu8_epi16(a); let e = _mm_set1_epi16(10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepu8_epi32() { let a = _mm_set1_epi8(10); let r = _mm_cvtepu8_epi32(a); let e = _mm_set1_epi32(10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepu8_epi64() { let a = _mm_set1_epi8(10); let r = _mm_cvtepu8_epi64(a); let e = _mm_set1_epi64x(10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepu16_epi32() { let a = _mm_set1_epi16(10); let r = _mm_cvtepu16_epi32(a); let e = _mm_set1_epi32(10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepu16_epi64() { let a = _mm_set1_epi16(10); let r = _mm_cvtepu16_epi64(a); let e = _mm_set1_epi64x(10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_cvtepu32_epi64() { let a = _mm_set1_epi32(10); let r = _mm_cvtepu32_epi64(a); let e = _mm_set1_epi64x(10); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_dp_pd() { let a = _mm_setr_pd(2.0, 3.0); let b = _mm_setr_pd(1.0, 4.0); let e = _mm_setr_pd(14.0, 0.0); assert_eq_m128d(_mm_dp_pd(a, b, 0b00110001), e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_dp_ps() { let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0); let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0); let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0); assert_eq_m128(_mm_dp_ps(a, b, 0b01110101), e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_floor_pd() { let a = _mm_setr_pd(2.5, 4.5); let r = _mm_floor_pd(a); let e = _mm_setr_pd(2.0, 4.0); assert_eq_m128d(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_floor_ps() { let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); let r = _mm_floor_ps(a); let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_floor_sd() { let a = _mm_setr_pd(2.5, 4.5); let b = _mm_setr_pd(-1.5, -3.5); let r = _mm_floor_sd(a, b); let e = _mm_setr_pd(-2.0, 4.5); assert_eq_m128d(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_floor_ss() { let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5); let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5); let r = _mm_floor_ss(a, b); let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5); assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_ceil_pd() { let a = _mm_setr_pd(1.5, 3.5); let r = _mm_ceil_pd(a); let e = _mm_setr_pd(2.0, 4.0); assert_eq_m128d(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_ceil_ps() { let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); let r = _mm_ceil_ps(a); let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0); assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_ceil_sd() { let a = _mm_setr_pd(1.5, 3.5); let b = _mm_setr_pd(-2.5, -4.5); let r = _mm_ceil_sd(a, b); let e = _mm_setr_pd(-2.0, 3.5); assert_eq_m128d(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_ceil_ss() { let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5); let r = _mm_ceil_ss(a, b); let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_round_pd() { let a = _mm_setr_pd(1.25, 3.75); let r = _mm_round_pd(a, _MM_FROUND_TO_NEAREST_INT); let e = _mm_setr_pd(1.0, 4.0); assert_eq_m128d(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_round_ps() { let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25); let r = _mm_round_ps(a, _MM_FROUND_TO_ZERO); let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0); assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_round_sd() { let a = _mm_setr_pd(1.5, 3.5); let b = _mm_setr_pd(-2.5, -4.5); let old_mode = _MM_GET_ROUNDING_MODE(); _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO); let r = _mm_round_sd(a, b, _MM_FROUND_CUR_DIRECTION); _MM_SET_ROUNDING_MODE(old_mode); let e = _mm_setr_pd(-2.0, 3.5); assert_eq_m128d(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_round_ss() { let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5); let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5); let old_mode = _MM_GET_ROUNDING_MODE(); _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); let r = _mm_round_ss(a, b, _MM_FROUND_CUR_DIRECTION); _MM_SET_ROUNDING_MODE(old_mode); let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5); assert_eq_m128(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_minpos_epu16_1() { let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66); let r = _mm_minpos_epu16(a); let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_minpos_epu16_2() { let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66); let r = _mm_minpos_epu16(a); let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_mul_epi32() { { let a = _mm_setr_epi32(1, 1, 1, 1); let b = _mm_setr_epi32(1, 2, 3, 4); let r = _mm_mul_epi32(a, b); let e = _mm_setr_epi64x(1, 3); assert_eq_m128i(r, e); } { let a = _mm_setr_epi32(15, 2 /* ignored */, 1234567, 4 /* ignored */); let b = _mm_setr_epi32( -20, -256, /* ignored */ 666666, 666666, /* ignored */ ); let r = _mm_mul_epi32(a, b); let e = _mm_setr_epi64x(-300, 823043843622); assert_eq_m128i(r, e); } } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_mullo_epi32() { { let a = _mm_setr_epi32(1, 1, 1, 1); let b = _mm_setr_epi32(1, 2, 3, 4); let r = _mm_mullo_epi32(a, b); let e = _mm_setr_epi32(1, 2, 3, 4); assert_eq_m128i(r, e); } { let a = _mm_setr_epi32(15, -2, 1234567, 99999); let b = _mm_setr_epi32(-20, -256, 666666, -99999); let r = _mm_mullo_epi32(a, b); // Attention, most significant bit in r[2] is treated // as a sign bit: // 1234567 * 666666 = -1589877210 let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409); assert_eq_m128i(r, e); } } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_minpos_epu16() { let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3); let r = _mm_minpos_epu16(a); let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_mpsadbw_epu8() { #[rustfmt::skip] let a = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let r = _mm_mpsadbw_epu8(a, a, 0b000); let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); assert_eq_m128i(r, e); let r = _mm_mpsadbw_epu8(a, a, 0b001); let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12); assert_eq_m128i(r, e); let r = _mm_mpsadbw_epu8(a, a, 0b100); let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44); assert_eq_m128i(r, e); let r = _mm_mpsadbw_epu8(a, a, 0b101); let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28); assert_eq_m128i(r, e); let r = _mm_mpsadbw_epu8(a, a, 0b111); let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_testz_si128() { let a = _mm_set1_epi8(1); let mask = _mm_set1_epi8(0); let r = _mm_testz_si128(a, mask); assert_eq!(r, 1); let a = _mm_set1_epi8(0b101); let mask = _mm_set1_epi8(0b110); let r = _mm_testz_si128(a, mask); assert_eq!(r, 0); let a = _mm_set1_epi8(0b011); let mask = _mm_set1_epi8(0b100); let r = _mm_testz_si128(a, mask); assert_eq!(r, 1); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_testc_si128() { let a = _mm_set1_epi8(-1); let mask = _mm_set1_epi8(0); let r = _mm_testc_si128(a, mask); assert_eq!(r, 1); let a = _mm_set1_epi8(0b101); let mask = _mm_set1_epi8(0b110); let r = _mm_testc_si128(a, mask); assert_eq!(r, 0); let a = _mm_set1_epi8(0b101); let mask = _mm_set1_epi8(0b100); let r = _mm_testc_si128(a, mask); assert_eq!(r, 1); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_testnzc_si128() { let a = _mm_set1_epi8(0); let mask = _mm_set1_epi8(1); let r = _mm_testnzc_si128(a, mask); assert_eq!(r, 0); let a = _mm_set1_epi8(-1); let mask = _mm_set1_epi8(0); let r = _mm_testnzc_si128(a, mask); assert_eq!(r, 0); let a = _mm_set1_epi8(0b101); let mask = _mm_set1_epi8(0b110); let r = _mm_testnzc_si128(a, mask); assert_eq!(r, 1); let a = _mm_set1_epi8(0b101); let mask = _mm_set1_epi8(0b101); let r = _mm_testnzc_si128(a, mask); assert_eq!(r, 0); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_test_all_zeros() { let a = _mm_set1_epi8(1); let mask = _mm_set1_epi8(0); let r = _mm_test_all_zeros(a, mask); assert_eq!(r, 1); let a = _mm_set1_epi8(0b101); let mask = _mm_set1_epi8(0b110); let r = _mm_test_all_zeros(a, mask); assert_eq!(r, 0); let a = _mm_set1_epi8(0b011); let mask = _mm_set1_epi8(0b100); let r = _mm_test_all_zeros(a, mask); assert_eq!(r, 1); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_test_all_ones() { let a = _mm_set1_epi8(-1); let r = _mm_test_all_ones(a); assert_eq!(r, 1); let a = _mm_set1_epi8(0b101); let r = _mm_test_all_ones(a); assert_eq!(r, 0); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_test_mix_ones_zeros() { let a = _mm_set1_epi8(0); let mask = _mm_set1_epi8(1); let r = _mm_test_mix_ones_zeros(a, mask); assert_eq!(r, 0); let a = _mm_set1_epi8(-1); let mask = _mm_set1_epi8(0); let r = _mm_test_mix_ones_zeros(a, mask); assert_eq!(r, 0); let a = _mm_set1_epi8(0b101); let mask = _mm_set1_epi8(0b110); let r = _mm_test_mix_ones_zeros(a, mask); assert_eq!(r, 1); let a = _mm_set1_epi8(0b101); let mask = _mm_set1_epi8(0b101); let r = _mm_test_mix_ones_zeros(a, mask); assert_eq!(r, 0); } } core_arch-0.1.5/src/x86/sse42.rs010064400007650000024000000757661343447103600144350ustar0000000000000000//! Streaming SIMD Extensions 4.2 (SSE4.2) //! //! Extends SSE4.1 with STTNI (String and Text New Instructions). #[cfg(test)] use stdsimd_test::assert_instr; use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, mem::transmute, }; /// String contains unsigned 8-bit characters *(Default)* #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000; /// String contains unsigned 16-bit characters #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001; /// String contains signed 8-bit characters #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010; /// String contains unsigned 16-bit characters #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011; /// For each character in `a`, find if it is in `b` *(Default)* #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000; /// For each character in `a`, determine if /// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...` #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100; /// The strings defined by `a` and `b` are equal #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000; /// Search for the defined substring in the target #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100; /// Do not negate results *(Default)* #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000; /// Negates results #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000; /// Do not negate results before the end of the string #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000; /// Negates results only before the end of the string #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000; /// **Index only**: return the least significant bit *(Default)* #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000; /// **Index only**: return the most significant bit #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000; /// **Mask only**: return the bit mask #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_BIT_MASK: i32 = 0b0000_0000; /// **Mask only**: return the byte mask #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000; /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return the generated mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpistrm128(a, b, $imm8) }; } transmute(constify_imm8!(imm8, call)) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8` and return the generated index. Similar to /// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the /// lengths of `a` and `b` to be explicitly specified. /// /// # Control modes /// /// The control specified by `imm8` may be one or more of the following. /// /// ## Data size and signedness /// /// - [`_SIDD_UBYTE_OPS`] - Default /// - [`_SIDD_UWORD_OPS`] /// - [`_SIDD_SBYTE_OPS`] /// - [`_SIDD_SWORD_OPS`] /// /// ## Comparison options /// - [`_SIDD_CMP_EQUAL_ANY`] - Default /// - [`_SIDD_CMP_RANGES`] /// - [`_SIDD_CMP_EQUAL_EACH`] /// - [`_SIDD_CMP_EQUAL_ORDERED`] /// /// ## Result polarity /// - [`_SIDD_POSITIVE_POLARITY`] - Default /// - [`_SIDD_NEGATIVE_POLARITY`] /// /// ## Bit returned /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default /// - [`_SIDD_MOST_SIGNIFICANT`] /// /// # Examples /// /// Finds a substring using [`_SIDD_CMP_EQUAL_ORDERED`] /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// let haystack = b"This is a long string of text data\r\n\tthat extends /// multiple lines"; /// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0"; /// /// let a = _mm_loadu_si128(needle.as_ptr() as *const _); /// let hop = 16; /// let mut indexes = Vec::new(); /// /// // Chunk the haystack into 16 byte chunks and find /// // the first "\r\n\t" in the chunk. /// for (i, chunk) in haystack.chunks(hop).enumerate() { /// let b = _mm_loadu_si128(chunk.as_ptr() as *const _); /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); /// if idx != 16 { /// indexes.push((idx as usize) + (i * hop)); /// } /// } /// assert_eq!(indexes, vec![34]); /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// The `_mm_cmpistri` intrinsic may also be used to find the existance of /// one or more of a given set of characters in the haystack. /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// // Ensure your input is 16 byte aligned /// let password = b"hunter2\0\0\0\0\0\0\0\0\0"; /// let special_chars = b"!@#$%^&*()[]:;<>"; /// /// // Load the input /// let a = _mm_loadu_si128(special_chars.as_ptr() as *const _); /// let b = _mm_loadu_si128(password.as_ptr() as *const _); /// /// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b /// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY); /// /// if idx < 16 { /// println!("Congrats! Your password contains a special character"); /// # panic!("{:?} does not contain a special character", password); /// } else { /// println!("Your password should contain a special character"); /// } /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// Finds the index of the first character in the haystack that is within a /// range of characters. /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// # let b = b":;<=>?@[\\]^_`abc"; /// # let b = _mm_loadu_si128(b.as_ptr() as *const _); /// /// // Specify the ranges of values to be searched for [A-Za-z0-9]. /// let a = b"AZaz09\0\0\0\0\0\0\0\0\0\0"; /// let a = _mm_loadu_si128(a.as_ptr() as *const _); /// /// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges. /// // Which in this case will be the first alpha numeric byte found /// // in the string. /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES); /// /// if idx < 16 { /// println!("Found an alpha numeric character"); /// # assert_eq!(idx, 13); /// } else { /// println!("Did not find an alpha numeric character"); /// } /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// Working with 16-bit characters. /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// # let mut some_utf16_words = [0u16; 8]; /// # let mut more_utf16_words = [0u16; 8]; /// # '❤'.encode_utf16(&mut some_utf16_words); /// # '𝕊'.encode_utf16(&mut more_utf16_words); /// // Load the input /// let a = _mm_loadu_si128(some_utf16_words.as_ptr() as *const _); /// let b = _mm_loadu_si128(more_utf16_words.as_ptr() as *const _); /// /// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and /// // use _SIDD_CMP_EQUAL_EACH to compare the two strings. /// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH); /// /// if idx == 0 { /// println!("16-bit unicode strings were equal!"); /// # panic!("Strings should not be equal!") /// } else { /// println!("16-bit unicode strings were not equal!"); /// } /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html /// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html /// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html /// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html /// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html /// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html /// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html /// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html /// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html /// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html /// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html /// [`_mm_cmpestri`]: fn._mm_cmpestri.html /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistri) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpistri128(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return `1` if any character in `b` was null. /// and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrz) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpistriz128(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return `1` if the resulting mask was non-zero, /// and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrc) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpistric128(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and returns `1` if any character in `a` was null, /// and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrs) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpistris128(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return bit `0` of the resulting bit mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpistrio128(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `imm8`, and return `1` if `b` did not contain a null /// character and the resulting mask was zero, and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistra) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpistria128(a, b, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return the generated mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrm(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) -> __m128i { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpestrm128(a, la, b, lb, $imm8) }; } transmute(constify_imm8!(imm8, call)) } /// Compares packed strings `a` and `b` with lengths `la` and `lb` using the /// control in `imm8` and return the generated index. Similar to /// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly /// determines the length of `a` and `b`. /// /// # Control modes /// /// The control specified by `imm8` may be one or more of the following. /// /// ## Data size and signedness /// /// - [`_SIDD_UBYTE_OPS`] - Default /// - [`_SIDD_UWORD_OPS`] /// - [`_SIDD_SBYTE_OPS`] /// - [`_SIDD_SWORD_OPS`] /// /// ## Comparison options /// - [`_SIDD_CMP_EQUAL_ANY`] - Default /// - [`_SIDD_CMP_RANGES`] /// - [`_SIDD_CMP_EQUAL_EACH`] /// - [`_SIDD_CMP_EQUAL_ORDERED`] /// /// ## Result polarity /// - [`_SIDD_POSITIVE_POLARITY`] - Default /// - [`_SIDD_NEGATIVE_POLARITY`] /// /// ## Bit returned /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default /// - [`_SIDD_MOST_SIGNIFICANT`] /// /// # Examples /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// /// // The string we want to find a substring in /// let haystack = b"Split \r\n\t line "; /// /// // The string we want to search for with some /// // extra bytes we do not want to search for. /// let needle = b"\r\n\t ignore this "; /// /// let a = _mm_loadu_si128(needle.as_ptr() as *const _); /// let b = _mm_loadu_si128(haystack.as_ptr() as *const _); /// /// // Note: We explicitly specify we only want to search `b` for the /// // first 3 characters of a. /// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED); /// /// assert_eq!(idx, 6); /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html /// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html /// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html /// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html /// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html /// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html /// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html /// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html /// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html /// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html /// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html /// [`_mm_cmpistri`]: fn._mm_cmpistri.html /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpestri128(a, la, b, lb, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if any character in /// `b` was null, and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrz) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrz(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpestriz128(a, la, b, lb, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if the resulting mask /// was non-zero, and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrc) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrc(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpestric128(a, la, b, lb, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if any character in /// a was null, and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrs) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrs(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpestris128(a, la, b, lb, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return bit `0` of the resulting /// bit mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestro) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestro(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpestrio128(a, la, b, lb, $imm8) }; } constify_imm8!(imm8, call) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `imm8`, and return `1` if `b` did not /// contain a null character and the resulting mask was zero, and `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestra) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))] #[rustc_args_required_const(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestra(a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32) -> i32 { let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! call { ($imm8:expr) => { pcmpestria128(a, la, b, lb, $imm8) }; } constify_imm8!(imm8, call) } /// Starting with the initial value in `crc`, return the accumulated /// CRC32 value for unsigned 8-bit integer `v`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u8) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 { crc32_32_8(crc, v) } /// Starting with the initial value in `crc`, return the accumulated /// CRC32 value for unsigned 16-bit integer `v`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u16) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 { crc32_32_16(crc, v) } /// Starting with the initial value in `crc`, return the accumulated /// CRC32 value for unsigned 32-bit integer `v`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u32) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { crc32_32_32(crc, v) } /// Compares packed 64-bit integers in `a` and `b` for greater-than, /// return the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpgtq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } #[allow(improper_ctypes)] extern "C" { // SSE 4.2 string and text comparison ops #[link_name = "llvm.x86.sse42.pcmpestrm128"] fn pcmpestrm128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> u8x16; #[link_name = "llvm.x86.sse42.pcmpestri128"] fn pcmpestri128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestriz128"] fn pcmpestriz128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestric128"] fn pcmpestric128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestris128"] fn pcmpestris128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestrio128"] fn pcmpestrio128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestria128"] fn pcmpestria128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistrm128"] fn pcmpistrm128(a: i8x16, b: i8x16, imm8: i8) -> i8x16; #[link_name = "llvm.x86.sse42.pcmpistri128"] fn pcmpistri128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistriz128"] fn pcmpistriz128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistric128"] fn pcmpistric128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistris128"] fn pcmpistris128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistrio128"] fn pcmpistrio128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistria128"] fn pcmpistria128(a: i8x16, b: i8x16, imm8: i8) -> i32; // SSE 4.2 CRC instructions #[link_name = "llvm.x86.sse42.crc32.32.8"] fn crc32_32_8(crc: u32, v: u8) -> u32; #[link_name = "llvm.x86.sse42.crc32.32.16"] fn crc32_32_16(crc: u32, v: u16) -> u32; #[link_name = "llvm.x86.sse42.crc32.32.32"] fn crc32_32_32(crc: u32, v: u32) -> u32; } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86::*; use std::ptr; // Currently one cannot `load` a &[u8] that is is less than 16 // in length. This makes loading strings less than 16 in length // a bit difficult. Rather than `load` and mutate the __m128i, // it is easier to memcpy the given string to a local slice with // length 16 and `load` the local slice. #[target_feature(enable = "sse4.2")] unsafe fn str_to_m128i(s: &[u8]) -> __m128i { assert!(s.len() <= 16); let slice = &mut [0u8; 16]; ptr::copy_nonoverlapping( s.get_unchecked(0) as *const u8 as *const u8, slice.get_unchecked_mut(0) as *mut u8 as *mut u8, s.len(), ); _mm_loadu_si128(slice.as_ptr() as *const _) } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistrm() { let a = str_to_m128i(b"Hello! Good-Bye!"); let b = str_to_m128i(b"hello! good-bye!"); let i = _mm_cmpistrm(a, b, _SIDD_UNIT_MASK); #[rustfmt::skip] let res = _mm_setr_epi8( 0x00, !0, !0, !0, !0, !0, !0, 0x00, !0, !0, !0, !0, 0x00, !0, !0, !0, ); assert_eq_m128i(i, res); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistri() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b" Hello "); let i = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(3, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistrz() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello"); let i = _mm_cmpistrz(a, b, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistrc() { let a = str_to_m128i(b" "); let b = str_to_m128i(b" ! "); let i = _mm_cmpistrc(a, b, _SIDD_UNIT_MASK); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistrs() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b""); let i = _mm_cmpistrs(a, b, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistro() { #[rustfmt::skip] let a_bytes = _mm_setr_epi8( 0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); #[rustfmt::skip] let b_bytes = _mm_setr_epi8( 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); let a = a_bytes; let b = b_bytes; let i = _mm_cmpistro(a, b, _SIDD_UWORD_OPS | _SIDD_UNIT_MASK); assert_eq!(0, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistra() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello!!!!!!!!!!!"); let i = _mm_cmpistra(a, b, _SIDD_UNIT_MASK); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestrm() { let a = str_to_m128i(b"Hello!"); let b = str_to_m128i(b"Hello."); let i = _mm_cmpestrm(a, 5, b, 5, _SIDD_UNIT_MASK); #[rustfmt::skip] let r = _mm_setr_epi8( !0, !0, !0, !0, !0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ); assert_eq_m128i(i, r); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestri() { let a = str_to_m128i(b"bar - garbage"); let b = str_to_m128i(b"foobar"); let i = _mm_cmpestri(a, 3, b, 6, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(3, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestrz() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello"); let i = _mm_cmpestrz(a, 16, b, 6, _SIDD_CMP_EQUAL_ORDERED); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestrc() { let va = str_to_m128i(b"!!!!!!!!"); let vb = str_to_m128i(b" "); let i = _mm_cmpestrc(va, 7, vb, 7, _SIDD_UNIT_MASK); assert_eq!(0, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestrs() { #[rustfmt::skip] let a_bytes = _mm_setr_epi8( 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); let a = a_bytes; let b = _mm_set1_epi8(0x00); let i = _mm_cmpestrs(a, 8, b, 0, _SIDD_UWORD_OPS); assert_eq!(0, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestro() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b"World"); let i = _mm_cmpestro(a, 5, b, 5, _SIDD_UBYTE_OPS); assert_eq!(0, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestra() { let a = str_to_m128i(b"Cannot match a"); let b = str_to_m128i(b"Null after 14"); let i = _mm_cmpestra(a, 14, b, 16, _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_crc32_u8() { let crc = 0x2aa1e72b; let v = 0x2a; let i = _mm_crc32_u8(crc, v); assert_eq!(i, 0xf24122e4); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_crc32_u16() { let crc = 0x8ecec3b5; let v = 0x22b; let i = _mm_crc32_u16(crc, v); assert_eq!(i, 0x13bb2fb); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_crc32_u32() { let crc = 0xae2912c8; let v = 0x845fed; let i = _mm_crc32_u32(crc, v); assert_eq!(i, 0xffae2ed1); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpgt_epi64() { let a = _mm_setr_epi64x(0, 0x2a); let b = _mm_set1_epi64x(0x00); let i = _mm_cmpgt_epi64(a, b); assert_eq_m128i(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64)); } } core_arch-0.1.5/src/x86/sse4a.rs010064400007650000024000000112031343447103600144640ustar0000000000000000//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`) use crate::{ core_arch::{simd::*, x86::*}, mem::transmute, }; #[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse4a.extrq"] fn extrq(x: i64x2, y: i8x16) -> i64x2; #[link_name = "llvm.x86.sse4a.insertq"] fn insertq(x: i64x2, y: i64x2) -> i64x2; #[link_name = "llvm.x86.sse4a.movnt.sd"] fn movntsd(x: *mut f64, y: __m128d); #[link_name = "llvm.x86.sse4a.movnt.ss"] fn movntss(x: *mut f32, y: __m128); } // FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ // FIXME(blocked on #248): _mm_inserti_si64(x, y, len, idx) // INSERTQ /// Extracts the bit range specified by `y` from the lower 64 bits of `x`. /// /// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The /// `[5:0]` bits of `y` specify the length of the bit-range to extract. All /// other bits are ignored. /// /// If the length is zero, it is interpreted as `64`. If the length and index /// are zero, the lower 64 bits of `x` are extracted. /// /// If `length == 0 && index > 0` or `lenght + index > 64` the result is /// undefined. #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(extrq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i { transmute(extrq(x.as_i64x2(), y.as_i8x16())) } /// Inserts the `[length:0]` bits of `y` into `x` at `index`. /// /// The bits of `y`: /// /// - `[69:64]` specify the `length`, /// - `[77:72]` specify the index. /// /// If the `length` is zero it is interpreted as `64`. If `index + length > 64` /// or `index > 0 && length == 0` the result is undefined. #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(insertq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i { transmute(insertq(x.as_i64x2(), y.as_i64x2())) } /// Non-temporal store of `a.0` into `p`. #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(movntsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) { movntsd(p, a); } /// Non-temporal store of `a.0` into `p`. #[inline] #[target_feature(enable = "sse4a")] #[cfg_attr(test, assert_instr(movntss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) { movntss(p, a); } #[cfg(test)] mod tests { use crate::core_arch::x86::*; use stdsimd_test::simd_test; #[simd_test(enable = "sse4a")] unsafe fn test_mm_extract_si64() { let b = 0b0110_0000_0000_i64; // ^^^^ bit range extracted let x = _mm_setr_epi64x(b, 0); let v = 0b001000___00___000100_i64; // ^idx: 2^3 = 8 ^length = 2^2 = 4 let y = _mm_setr_epi64x(v, 0); let e = _mm_setr_epi64x(0b0110_i64, 0); let r = _mm_extract_si64(x, y); assert_eq_m128i(r, e); } #[simd_test(enable = "sse4a")] unsafe fn test_mm_insert_si64() { let i = 0b0110_i64; // ^^^^ bit range inserted let z = 0b1010_1010_1010i64; // ^^^^ bit range replaced let e = 0b0110_1010_1010i64; // ^^^^ replaced 1010 with 0110 let x = _mm_setr_epi64x(z, 0); let expected = _mm_setr_epi64x(e, 0); let v = 0b001000___00___000100_i64; // ^idx: 2^3 = 8 ^length = 2^2 = 4 let y = _mm_setr_epi64x(i, v); let r = _mm_insert_si64(x, y); assert_eq_m128i(r, expected); } #[repr(align(16))] struct MemoryF64 { data: [f64; 2], } #[simd_test(enable = "sse4a")] unsafe fn test_mm_stream_sd() { let mut mem = MemoryF64 { data: [1.0_f64, 2.0], }; { let vals = &mut mem.data; let d = vals.as_mut_ptr(); let x = _mm_setr_pd(3.0, 4.0); _mm_stream_sd(d, x); } assert_eq!(mem.data[0], 3.0); assert_eq!(mem.data[1], 2.0); } #[repr(align(16))] struct MemoryF32 { data: [f32; 4], } #[simd_test(enable = "sse4a")] unsafe fn test_mm_stream_ss() { let mut mem = MemoryF32 { data: [1.0_f32, 2.0, 3.0, 4.0], }; { let vals = &mut mem.data; let d = vals.as_mut_ptr(); let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); _mm_stream_ss(d, x); } assert_eq!(mem.data[0], 5.0); assert_eq!(mem.data[1], 2.0); assert_eq!(mem.data[2], 3.0); assert_eq!(mem.data[3], 4.0); } } core_arch-0.1.5/src/x86/ssse3.rs010064400007650000024000000736021343447103600145200ustar0000000000000000//! Supplemental Streaming SIMD Extensions 3 (SSSE3) use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, mem::transmute, }; #[cfg(test)] use stdsimd_test::assert_instr; /// Computes the absolute value of packed 8-bit signed integers in `a` and /// return the unsigned results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pabsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i { transmute(pabsb128(a.as_i8x16())) } /// Computes the absolute value of each of the packed 16-bit signed integers in /// `a` and /// return the 16-bit unsigned integer /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pabsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i { transmute(pabsw128(a.as_i16x8())) } /// Computes the absolute value of each of the packed 32-bit signed integers in /// `a` and /// return the 32-bit unsigned integer /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pabsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i { transmute(pabsd128(a.as_i32x4())) } /// Shuffles bytes from `a` according to the content of `b`. /// /// The last 4 bits of each byte of `b` are used as addresses /// into the 16 bytes of `a`. /// /// In addition, if the highest significant bit of a byte of `b` /// is set, the respective destination byte is set to 0. /// /// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is /// logically equivalent to: /// /// ``` /// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] { /// let mut r = [0u8; 16]; /// for i in 0..16 { /// // if the most significant bit of b is set, /// // then the destination byte is set to 0. /// if b[i] & 0x80 == 0u8 { /// r[i] = a[(b[i] % 16) as usize]; /// } /// } /// r /// } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pshufb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i { transmute(pshufb128(a.as_u8x16(), b.as_u8x16())) } /// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result, /// shift the result right by `n` bytes, and returns the low 16 bytes. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(palignr, n = 15))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i { let n = n as u32; // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. if n > 32 { return _mm_set1_epi8(0); } // If palignr is shifting the pair of input vectors more than one lane, // but less than two lanes, convert to shifting in zeroes. let (a, b, n) = if n > 16 { (_mm_set1_epi8(0), a, n - 16) } else { (a, b, n) }; let a = a.as_i8x16(); let b = b.as_i8x16(); macro_rules! shuffle { ($shift:expr) => { simd_shuffle16( b, a, [ 0 + $shift, 1 + $shift, 2 + $shift, 3 + $shift, 4 + $shift, 5 + $shift, 6 + $shift, 7 + $shift, 8 + $shift, 9 + $shift, 10 + $shift, 11 + $shift, 12 + $shift, 13 + $shift, 14 + $shift, 15 + $shift, ], ) }; } let r: i8x16 = match n { 0 => shuffle!(0), 1 => shuffle!(1), 2 => shuffle!(2), 3 => shuffle!(3), 4 => shuffle!(4), 5 => shuffle!(5), 6 => shuffle!(6), 7 => shuffle!(7), 8 => shuffle!(8), 9 => shuffle!(9), 10 => shuffle!(10), 11 => shuffle!(11), 12 => shuffle!(12), 13 => shuffle!(13), 14 => shuffle!(14), 15 => shuffle!(15), _ => shuffle!(16), }; transmute(r) } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of `[8 x i16]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phaddw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(phaddw128(a.as_i16x8(), b.as_i16x8())) } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phaddsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of `[4 x i32]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phaddd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(phaddd128(a.as_i32x4(), b.as_i32x4())) } /// Horizontally subtract the adjacent pairs of values contained in 2 /// packed 128-bit vectors of `[8 x i16]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phsubw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(phsubw128(a.as_i16x8(), b.as_i16x8())) } /// Horizontally subtract the adjacent pairs of values contained in 2 /// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are /// saturated to 8000h. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phsubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) } /// Horizontally subtract the adjacent pairs of values contained in 2 /// packed 128-bit vectors of `[4 x i32]`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(phsubd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(phsubd128(a.as_i32x4(), b.as_i32x4())) } /// Multiplies corresponding pairs of packed 8-bit unsigned integer /// values contained in the first source operand and packed 8-bit signed /// integer values contained in the second source operand, add pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to /// the corresponding bits in the destination. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pmaddubsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16())) } /// Multiplies packed 16-bit signed integer values, truncate the 32-bit /// product to the 18 most significant bits by right-shifting, round the /// truncated value by adding 1, and write bits `[16:1]` to the destination. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(pmulhrsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8())) } /// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit /// integer in `b` is negative, and returns the result. /// Elements in result are zeroed out when the corresponding element in `b` /// is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(psignb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i { transmute(psignb128(a.as_i8x16(), b.as_i8x16())) } /// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit /// integer in `b` is negative, and returns the results. /// Elements in result are zeroed out when the corresponding element in `b` /// is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(psignw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i { transmute(psignw128(a.as_i16x8(), b.as_i16x8())) } /// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit /// integer in `b` is negative, and returns the results. /// Element in result are zeroed out when the corresponding element in `b` /// is zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32) #[inline] #[target_feature(enable = "ssse3")] #[cfg_attr(test, assert_instr(psignd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i { transmute(psignd128(a.as_i32x4(), b.as_i32x4())) } /// Computes the absolute value of packed 8-bit integers in `a` and /// return the unsigned results. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(pabsb))] pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 { pabsb(a) } /// Computes the absolute value of packed 8-bit integers in `a`, and returns the /// unsigned results. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(pabsw))] pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 { pabsw(a) } /// Computes the absolute value of packed 32-bit integers in `a`, and returns the /// unsigned results. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(pabsd))] pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 { pabsd(a) } /// Shuffles packed 8-bit integers in `a` according to shuffle control mask in /// the corresponding 8-bit element of `b`, and returns the results #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(pshufb))] pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 { pshufb(a, b) } /// Concatenates the two 64-bit integer vector operands, and right-shifts /// the result by the number of bytes specified in the immediate operand. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(palignr, n = 15))] #[rustc_args_required_const(2)] pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 { macro_rules! call { ($imm8:expr) => { palignrb(a, b, $imm8) }; } constify_imm8!(n, call) } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of `[4 x i16]`. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(phaddw))] pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 { phaddw(a, b) } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of `[2 x i32]`. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(phaddd))] pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 { phaddd(a, b) } /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(phaddsw))] pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 { phaddsw(a, b) } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of `[4 x i16]`. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(phsubw))] pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 { phsubw(a, b) } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of `[2 x i32]`. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(phsubd))] pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 { phsubd(a, b) } /// Horizontally subtracts the adjacent pairs of values contained in 2 /// packed 64-bit vectors of `[4 x i16]`. Positive differences greater than /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are /// saturated to 8000h. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(phsubsw))] pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 { phsubsw(a, b) } /// Multiplies corresponding pairs of packed 8-bit unsigned integer /// values contained in the first source operand and packed 8-bit signed /// integer values contained in the second source operand, adds pairs of /// contiguous products with signed saturation, and writes the 16-bit sums to /// the corresponding bits in the destination. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(pmaddubsw))] pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 { pmaddubsw(a, b) } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit /// products to the 18 most significant bits by right-shifting, rounds the /// truncated value by adding 1, and writes bits `[16:1]` to the destination. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(pmulhrsw))] pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 { pmulhrsw(a, b) } /// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit /// integer in `b` is negative, and returns the results. /// Element in result are zeroed out when the corresponding element in `b` is /// zero. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(psignb))] pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 { psignb(a, b) } /// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit /// integer in `b` is negative, and returns the results. /// Element in result are zeroed out when the corresponding element in `b` is /// zero. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(psignw))] pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 { psignw(a, b) } /// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit /// integer in `b` is negative, and returns the results. /// Element in result are zeroed out when the corresponding element in `b` is /// zero. #[inline] #[target_feature(enable = "ssse3,mmx")] #[cfg_attr(test, assert_instr(psignd))] pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 { psignd(a, b) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.ssse3.pabs.b.128"] fn pabsb128(a: i8x16) -> u8x16; #[link_name = "llvm.x86.ssse3.pabs.w.128"] fn pabsw128(a: i16x8) -> u16x8; #[link_name = "llvm.x86.ssse3.pabs.d.128"] fn pabsd128(a: i32x4) -> u32x4; #[link_name = "llvm.x86.ssse3.pshuf.b.128"] fn pshufb128(a: u8x16, b: u8x16) -> u8x16; #[link_name = "llvm.x86.ssse3.phadd.w.128"] fn phaddw128(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.ssse3.phadd.sw.128"] fn phaddsw128(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.ssse3.phadd.d.128"] fn phaddd128(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.ssse3.phsub.w.128"] fn phsubw128(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.ssse3.phsub.sw.128"] fn phsubsw128(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.ssse3.phsub.d.128"] fn phsubd128(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"] fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8; #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"] fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.ssse3.psign.b.128"] fn psignb128(a: i8x16, b: i8x16) -> i8x16; #[link_name = "llvm.x86.ssse3.psign.w.128"] fn psignw128(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.ssse3.psign.d.128"] fn psignd128(a: i32x4, b: i32x4) -> i32x4; #[link_name = "llvm.x86.ssse3.pabs.b"] fn pabsb(a: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.pabs.w"] fn pabsw(a: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.pabs.d"] fn pabsd(a: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.pshuf.b"] fn pshufb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.palignr.b"] fn palignrb(a: __m64, b: __m64, n: u8) -> __m64; #[link_name = "llvm.x86.ssse3.phadd.w"] fn phaddw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.phadd.d"] fn phaddd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.phadd.sw"] fn phaddsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.phsub.w"] fn phsubw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.phsub.d"] fn phsubd(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.phsub.sw"] fn phsubsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.pmadd.ub.sw"] fn pmaddubsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.pmul.hr.sw"] fn pmulhrsw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.psign.b"] fn psignb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.psign.w"] fn psignw(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.ssse3.psign.d"] fn psignd(a: __m64, b: __m64) -> __m64; } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "ssse3")] unsafe fn test_mm_abs_epi8() { let r = _mm_abs_epi8(_mm_set1_epi8(-5)); assert_eq_m128i(r, _mm_set1_epi8(5)); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_abs_epi16() { let r = _mm_abs_epi16(_mm_set1_epi16(-5)); assert_eq_m128i(r, _mm_set1_epi16(5)); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_abs_epi32() { let r = _mm_abs_epi32(_mm_set1_epi32(-5)); assert_eq_m128i(r, _mm_set1_epi32(5)); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_shuffle_epi8() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); #[rustfmt::skip] let b = _mm_setr_epi8( 4, 128_u8 as i8, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0, ); let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1); let r = _mm_shuffle_epi8(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_alignr_epi8() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); #[rustfmt::skip] let b = _mm_setr_epi8( 4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0, ); let r = _mm_alignr_epi8(a, b, 33); assert_eq_m128i(r, _mm_set1_epi8(0)); let r = _mm_alignr_epi8(a, b, 17); #[rustfmt::skip] let expected = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, ); assert_eq_m128i(r, expected); let r = _mm_alignr_epi8(a, b, 16); assert_eq_m128i(r, a); let r = _mm_alignr_epi8(a, b, 15); #[rustfmt::skip] let expected = _mm_setr_epi8( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); assert_eq_m128i(r, expected); let r = _mm_alignr_epi8(a, b, 0); assert_eq_m128i(r, b); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_hadd_epi16() { let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19); let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25); let r = _mm_hadd_epi16(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_hadds_epi16() { let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1); let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768); let r = _mm_hadds_epi16(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_hadd_epi32() { let a = _mm_setr_epi32(1, 2, 3, 4); let b = _mm_setr_epi32(4, 128, 4, 3); let expected = _mm_setr_epi32(3, 7, 132, 7); let r = _mm_hadd_epi32(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_hsub_epi16() { let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19); let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13); let r = _mm_hsub_epi16(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_hsubs_epi16() { let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1); let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768); let r = _mm_hsubs_epi16(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_hsub_epi32() { let a = _mm_setr_epi32(1, 2, 3, 4); let b = _mm_setr_epi32(4, 128, 4, 3); let expected = _mm_setr_epi32(-1, -1, -124, 1); let r = _mm_hsub_epi32(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_maddubs_epi16() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ); #[rustfmt::skip] let b = _mm_setr_epi8( 4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0, ); let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120); let r = _mm_maddubs_epi16(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_mulhrs_epi16() { let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1); let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0); let r = _mm_mulhrs_epi16(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_sign_epi8() { #[rustfmt::skip] let a = _mm_setr_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16, ); #[rustfmt::skip] let b = _mm_setr_epi8( 4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0, ); #[rustfmt::skip] let expected = _mm_setr_epi8( 1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0, ); let r = _mm_sign_epi8(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_sign_epi16() { let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8); let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1); let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8); let r = _mm_sign_epi16(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3")] unsafe fn test_mm_sign_epi32() { let a = _mm_setr_epi32(-1, 2, 3, 4); let b = _mm_setr_epi32(1, -1, 1, 0); let expected = _mm_setr_epi32(-1, -2, 3, 0); let r = _mm_sign_epi32(a, b); assert_eq_m128i(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_abs_pi8() { let r = _mm_abs_pi8(_mm_set1_pi8(-5)); assert_eq_m64(r, _mm_set1_pi8(5)); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_abs_pi16() { let r = _mm_abs_pi16(_mm_set1_pi16(-5)); assert_eq_m64(r, _mm_set1_pi16(5)); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_abs_pi32() { let r = _mm_abs_pi32(_mm_set1_pi32(-5)); assert_eq_m64(r, _mm_set1_pi32(5)); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_shuffle_pi8() { let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19); let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4); let r = _mm_shuffle_pi8(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_alignr_pi8() { let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32); let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32); let r = _mm_alignr_pi8(a, b, 4); assert_eq_m64(r, transmute(0x89abcdefffddeecc_u64)); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_hadd_pi16() { let a = _mm_setr_pi16(1, 2, 3, 4); let b = _mm_setr_pi16(4, 128, 4, 3); let expected = _mm_setr_pi16(3, 7, 132, 7); let r = _mm_hadd_pi16(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_hadd_pi32() { let a = _mm_setr_pi32(1, 2); let b = _mm_setr_pi32(4, 128); let expected = _mm_setr_pi32(3, 132); let r = _mm_hadd_pi32(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_hadds_pi16() { let a = _mm_setr_pi16(1, 2, 3, 4); let b = _mm_setr_pi16(32767, 1, -32768, -1); let expected = _mm_setr_pi16(3, 7, 32767, -32768); let r = _mm_hadds_pi16(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_hsub_pi16() { let a = _mm_setr_pi16(1, 2, 3, 4); let b = _mm_setr_pi16(4, 128, 4, 3); let expected = _mm_setr_pi16(-1, -1, -124, 1); let r = _mm_hsub_pi16(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_hsub_pi32() { let a = _mm_setr_pi32(1, 2); let b = _mm_setr_pi32(4, 128); let expected = _mm_setr_pi32(-1, -124); let r = _mm_hsub_pi32(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_hsubs_pi16() { let a = _mm_setr_pi16(1, 2, 3, 4); let b = _mm_setr_pi16(4, 128, 4, 3); let expected = _mm_setr_pi16(-1, -1, -124, 1); let r = _mm_hsubs_pi16(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_maddubs_pi16() { let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19); let expected = _mm_setr_pi16(130, 24, 192, 194); let r = _mm_maddubs_pi16(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_mulhrs_pi16() { let a = _mm_setr_pi16(1, 2, 3, 4); let b = _mm_setr_pi16(4, 32767, -1, -32768); let expected = _mm_setr_pi16(0, 2, 0, -4); let r = _mm_mulhrs_pi16(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_sign_pi8() { let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8); let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1); let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8); let r = _mm_sign_pi8(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_sign_pi16() { let a = _mm_setr_pi16(-1, 2, 3, 4); let b = _mm_setr_pi16(1, -1, 1, 0); let expected = _mm_setr_pi16(-1, -2, 3, 0); let r = _mm_sign_pi16(a, b); assert_eq_m64(r, expected); } #[simd_test(enable = "ssse3,mmx")] unsafe fn test_mm_sign_pi32() { let a = _mm_setr_pi32(-1, 2); let b = _mm_setr_pi32(1, 0); let expected = _mm_setr_pi32(-1, 0); let r = _mm_sign_pi32(a, b); assert_eq_m64(r, expected); } } core_arch-0.1.5/src/x86/tbm.rs010064400007650000024000000342521343447103600142400ustar0000000000000000//! Trailing Bit Manipulation (TBM) instruction set. //! //! The reference is [AMD64 Architecture Programmer's Manual, Volume 3: //! General-Purpose and System Instructions][amd64_ref]. //! //! [Wikipedia][wikipedia_bmi] provides a quick overview of the available //! instructions. //! //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wikipedia_bmi]: //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; // FIXME(blocked on #248) // TODO: LLVM-CODEGEN ERROR: LLVM ERROR: Cannot select: // intrinsic %llvm.x86.tbm.bextri.u32 /* #[allow(dead_code)] extern "C" { #[link_name="llvm.x86.tbm.bextri.u32"] fn x86_tbm_bextri_u32(a: u32, y: u32) -> u32; #[link_name="llvm.x86.tbm.bextri.u64"] fn x86_tbm_bextri_u64(x: u64, y: u64) -> u64; } /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. #[inline] #[target_feature(enable = "tbm")] pub fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 { _bextr2_u32(a, (start & 0xffu32) | ((len & 0xffu32) << 8u32)) } /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. #[inline] #[target_feature(enable = "tbm")] pub fn _bextr_u64(a: u64, start: u64, len: u64) -> u64 { _bextr2_u64(a, (start & 0xffu64) | ((len & 0xffu64) << 8u64)) } /// Extracts bits of `a` specified by `control` into /// the least significant bits of the result. /// /// Bits `[7,0]` of `control` specify the index to the first bit in the range to /// be extracted, and bits `[15,8]` specify the length of the range. #[inline] #[target_feature(enable = "tbm")] pub fn _bextr2_u32(a: u32, control: u32) -> u32 { unsafe { x86_tbm_bextri_u32(a, control) } } /// Extracts bits of `a` specified by `control` into /// the least significant bits of the result. /// /// Bits `[7,0]` of `control` specify the index to the first bit in the range to /// be extracted, and bits `[15,8]` specify the length of the range. #[inline] #[target_feature(enable = "tbm")] pub fn _bextr2_u64(a: u64, control: u64) -> u64 { unsafe { x86_tbm_bextri_u64(a, control) } } */ /// Clears all bits below the least significant zero bit of `x`. /// /// If there is no zero bit in `x`, it returns zero. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcfill))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcfill_u32(x: u32) -> u32 { x & (x.wrapping_add(1)) } /// Clears all bits below the least significant zero bit of `x`. /// /// If there is no zero bit in `x`, it returns zero. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcfill))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcfill_u64(x: u64) -> u64 { x & (x.wrapping_add(1)) } /// Sets all bits of `x` to 1 except for the least significant zero bit. /// /// If there is no zero bit in `x`, it sets all bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blci))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blci_u32(x: u32) -> u32 { x | !(x.wrapping_add(1)) } /// Sets all bits of `x` to 1 except for the least significant zero bit. /// /// If there is no zero bit in `x`, it sets all bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blci))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blci_u64(x: u64) -> u64 { x | !(x.wrapping_add(1)) } /// Sets the least significant zero bit of `x` and clears all other bits. /// /// If there is no zero bit in `x`, it returns zero. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcic))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcic_u32(x: u32) -> u32 { !x & (x.wrapping_add(1)) } /// Sets the least significant zero bit of `x` and clears all other bits. /// /// If there is no zero bit in `x`, it returns zero. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcic))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcic_u64(x: u64) -> u64 { !x & (x.wrapping_add(1)) } /// Sets the least significant zero bit of `x` and clears all bits above /// that bit. /// /// If there is no zero bit in `x`, it sets all the bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcmsk_u32(x: u32) -> u32 { x ^ (x.wrapping_add(1)) } /// Sets the least significant zero bit of `x` and clears all bits above /// that bit. /// /// If there is no zero bit in `x`, it sets all the bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcmsk))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcmsk_u64(x: u64) -> u64 { x ^ (x.wrapping_add(1)) } /// Sets the least significant zero bit of `x`. /// /// If there is no zero bit in `x`, it returns `x`. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcs))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcs_u32(x: u32) -> u32 { x | (x.wrapping_add(1)) } /// Sets the least significant zero bit of `x`. /// /// If there is no zero bit in `x`, it returns `x`. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blcs))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blcs_u64(x: u64) -> u64 { x | x.wrapping_add(1) } /// Sets all bits of `x` below the least significant one. /// /// If there is no set bit in `x`, it sets all the bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsfill))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsfill_u32(x: u32) -> u32 { x | (x.wrapping_sub(1)) } /// Sets all bits of `x` below the least significant one. /// /// If there is no set bit in `x`, it sets all the bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsfill))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsfill_u64(x: u64) -> u64 { x | (x.wrapping_sub(1)) } /// Clears least significant bit and sets all other bits. /// /// If there is no set bit in `x`, it sets all the bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsic))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsic_u32(x: u32) -> u32 { !x | (x.wrapping_sub(1)) } /// Clears least significant bit and sets all other bits. /// /// If there is no set bit in `x`, it sets all the bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(blsic))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsic_u64(x: u64) -> u64 { !x | (x.wrapping_sub(1)) } /// Clears all bits below the least significant zero of `x` and sets all other /// bits. /// /// If the least significant bit of `x` is `0`, it sets all bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(t1mskc))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _t1mskc_u32(x: u32) -> u32 { !x | (x.wrapping_add(1)) } /// Clears all bits below the least significant zero of `x` and sets all other /// bits. /// /// If the least significant bit of `x` is `0`, it sets all bits. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(t1mskc))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _t1mskc_u64(x: u64) -> u64 { !x | (x.wrapping_add(1)) } /// Sets all bits below the least significant one of `x` and clears all other /// bits. /// /// If the least significant bit of `x` is 1, it returns zero. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(tzmsk))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _tzmsk_u32(x: u32) -> u32 { !x & (x.wrapping_sub(1)) } /// Sets all bits below the least significant one of `x` and clears all other /// bits. /// /// If the least significant bit of `x` is 1, it returns zero. #[inline] #[target_feature(enable = "tbm")] #[cfg_attr(test, assert_instr(tzmsk))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _tzmsk_u64(x: u64) -> u64 { !x & (x.wrapping_sub(1)) } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86::*; /* #[simd_test(enable = "tbm")] unsafe fn test_bextr_u32() { assert_eq!(_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32); } #[simd_test(enable = "tbm")] unsafe fn test_bextr_u64() { assert_eq!(_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64); } */ #[simd_test(enable = "tbm")] unsafe fn test_blcfill_u32() { assert_eq!(_blcfill_u32(0b0101_0111u32), 0b0101_0000u32); assert_eq!(_blcfill_u32(0b1111_1111u32), 0u32); } #[simd_test(enable = "tbm")] #[cfg(not(target_arch = "x86"))] unsafe fn test_blcfill_u64() { assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64); assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64); } #[simd_test(enable = "tbm")] unsafe fn test_blci_u32() { assert_eq!( _blci_u32(0b0101_0000u32), 0b1111_1111_1111_1111_1111_1111_1111_1110u32 ); assert_eq!( _blci_u32(0b1111_1111u32), 0b1111_1111_1111_1111_1111_1110_1111_1111u32 ); } #[simd_test(enable = "tbm")] #[cfg(not(target_arch = "x86"))] #[rustfmt::skip] unsafe fn test_blci_u64() { assert_eq!( _blci_u64(0b0101_0000u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64 ); assert_eq!( _blci_u64(0b1111_1111u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64 ); } #[simd_test(enable = "tbm")] unsafe fn test_blcic_u32() { assert_eq!(_blcic_u32(0b0101_0001u32), 0b0000_0010u32); assert_eq!(_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32); } #[simd_test(enable = "tbm")] #[cfg(not(target_arch = "x86"))] unsafe fn test_blcic_u64() { assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64); assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64); } #[simd_test(enable = "tbm")] unsafe fn test_blcmsk_u32() { assert_eq!(_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32); assert_eq!(_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32); } #[simd_test(enable = "tbm")] #[cfg(not(target_arch = "x86"))] unsafe fn test_blcmsk_u64() { assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64); assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64); } #[simd_test(enable = "tbm")] unsafe fn test_blcs_u32() { assert_eq!(_blcs_u32(0b0101_0001u32), 0b0101_0011u32); assert_eq!(_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32); } #[simd_test(enable = "tbm")] #[cfg(not(target_arch = "x86"))] unsafe fn test_blcs_u64() { assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64); assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64); } #[simd_test(enable = "tbm")] unsafe fn test_blsfill_u32() { assert_eq!(_blsfill_u32(0b0101_0100u32), 0b0101_0111u32); assert_eq!( _blsfill_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32 ); } #[simd_test(enable = "tbm")] #[cfg(not(target_arch = "x86"))] #[rustfmt::skip] unsafe fn test_blsfill_u64() { assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64); assert_eq!( _blsfill_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 ); } #[simd_test(enable = "tbm")] unsafe fn test_blsic_u32() { assert_eq!( _blsic_u32(0b0101_0100u32), 0b1111_1111_1111_1111_1111_1111_1111_1011u32 ); assert_eq!( _blsic_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32 ); } #[simd_test(enable = "tbm")] #[cfg(not(target_arch = "x86"))] #[rustfmt::skip] unsafe fn test_blsic_u64() { assert_eq!( _blsic_u64(0b0101_0100u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64 ); assert_eq!( _blsic_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 ); } #[simd_test(enable = "tbm")] unsafe fn test_t1mskc_u32() { assert_eq!( _t1mskc_u32(0b0101_0111u32), 0b1111_1111_1111_1111_1111_1111_1111_1000u32 ); assert_eq!( _t1mskc_u32(0u32), 0b1111_1111_1111_1111_1111_1111_1111_1111u32 ); } #[simd_test(enable = "tbm")] #[cfg(not(target_arch = "x86"))] #[rustfmt::skip] unsafe fn test_t1mksc_u64() { assert_eq!( _t1mskc_u64(0b0101_0111u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64 ); assert_eq!( _t1mskc_u64(0u64), 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64 ); } #[simd_test(enable = "tbm")] unsafe fn test_tzmsk_u32() { assert_eq!(_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32); assert_eq!(_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32); } #[simd_test(enable = "tbm")] #[cfg(not(target_arch = "x86"))] unsafe fn test_tzmsk_u64() { assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64); assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64); } } core_arch-0.1.5/src/x86/test.rs010064400007650000024000000066331343447103600144370ustar0000000000000000//! Utilities used in testing the x86 intrinsics use crate::core_arch::x86::*; #[target_feature(enable = "mmx")] pub unsafe fn assert_eq_m64(a: __m64, b: __m64) { union A { a: __m64, b: u64, } assert_eq!(A { a }.b, A { a: b }.b) } #[target_feature(enable = "sse2")] pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) { union A { a: __m128i, b: [u64; 2], } assert_eq!(A { a }.b, A { a: b }.b) } #[target_feature(enable = "sse2")] pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) { if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 { panic!("{:?} != {:?}", a, b); } } #[target_feature(enable = "sse2")] pub unsafe fn get_m128d(a: __m128d, idx: usize) -> f64 { union A { a: __m128d, b: [f64; 2], }; A { a }.b[idx] } #[target_feature(enable = "sse")] pub unsafe fn assert_eq_m128(a: __m128, b: __m128) { let r = _mm_cmpeq_ps(a, b); if _mm_movemask_ps(r) != 0b1111 { panic!("{:?} != {:?}", a, b); } } #[target_feature(enable = "sse")] pub unsafe fn get_m128(a: __m128, idx: usize) -> f32 { union A { a: __m128, b: [f32; 4], }; A { a }.b[idx] } // not actually an intrinsic but useful in various tests as we proted from // `i64x2::new` which is backwards from `_mm_set_epi64x` #[target_feature(enable = "sse2")] pub unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i { _mm_set_epi64x(b, a) } #[target_feature(enable = "avx")] pub unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) { union A { a: __m256i, b: [u64; 4], } assert_eq!(A { a }.b, A { a: b }.b) } #[target_feature(enable = "avx")] pub unsafe fn assert_eq_m256d(a: __m256d, b: __m256d) { let cmp = _mm256_cmp_pd(a, b, _CMP_EQ_OQ); if _mm256_movemask_pd(cmp) != 0b1111 { panic!("{:?} != {:?}", a, b); } } #[target_feature(enable = "avx")] pub unsafe fn get_m256d(a: __m256d, idx: usize) -> f64 { union A { a: __m256d, b: [f64; 4], }; A { a }.b[idx] } #[target_feature(enable = "avx")] pub unsafe fn assert_eq_m256(a: __m256, b: __m256) { let cmp = _mm256_cmp_ps(a, b, _CMP_EQ_OQ); if _mm256_movemask_ps(cmp) != 0b11111111 { panic!("{:?} != {:?}", a, b); } } #[target_feature(enable = "avx")] pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 { union A { a: __m256, b: [f32; 8], }; A { a }.b[idx] } // These intrinsics doesn't exist on x86 b/c it requires a 64-bit register, // which doesn't exist on x86! #[cfg(target_arch = "x86")] mod x86_polyfill { use crate::core_arch::x86::*; pub unsafe fn _mm_insert_epi64(a: __m128i, val: i64, idx: i32) -> __m128i { union A { a: __m128i, b: [i64; 2], }; let mut a = A { a }; a.b[idx as usize] = val; a.a } #[target_feature(enable = "avx2")] pub unsafe fn _mm256_insert_epi64(a: __m256i, val: i64, idx: i32) -> __m256i { union A { a: __m256i, b: [i64; 4], }; let mut a = A { a }; a.b[idx as usize] = val; a.a } } #[cfg(target_arch = "x86_64")] mod x86_polyfill { pub use crate::core_arch::x86_64::{_mm256_insert_epi64, _mm_insert_epi64}; } pub use self::x86_polyfill::*; pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { union A { a: __m512i, b: [i32; 16], } assert_eq!(A { a }.b, A { a: b }.b) } core_arch-0.1.5/src/x86/xsave.rs010064400007650000024000000233321345561510300145770ustar0000000000000000//! `i586`'s `xsave` and `xsaveopt` target feature intrinsics #![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.xsave"] fn xsave(p: *mut u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xrstor"] fn xrstor(p: *const u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xsetbv"] fn xsetbv(v: u32, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xsaveopt"] fn xsaveopt(p: *mut u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xsavec"] fn xsavec(p: *mut u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xsaves"] fn xsaves(p: *mut u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xrstors"] fn xrstors(p: *const u8, hi: u32, lo: u32) -> (); } /// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr`. /// /// State is saved based on bits `[62:0]` in `save_mask` and XCR0. /// `mem_addr` must be aligned on a 64-byte boundary. /// /// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of /// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xsave))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) { xsave(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } /// Performs a full or partial restore of the enabled processor states using /// the state information stored in memory at `mem_addr`. /// /// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte /// boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xrstor))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) { xrstor(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } /// `XFEATURE_ENABLED_MASK` for `XCR` /// /// This intrinsic maps to `XSETBV` instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0; /// Copies 64-bits from `val` to the extended control register (`XCR`) specified /// by `a`. /// /// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsetbv) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xsetbv))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsetbv(a: u32, val: u64) { xsetbv(a, (val >> 32) as u32, val as u32); } /// Reads the contents of the extended control register `XCR` /// specified in `xcr_no`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xgetbv) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xgetbv))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xgetbv(xcr_no: u32) -> u64 { let eax: u32; let edx: u32; asm!("xgetbv" : "={eax}"(eax), "={edx}"(edx) : "{ecx}"(xcr_no)); ((edx as u64) << 32) | (eax as u64) } /// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr`. /// /// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`. /// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize /// the manner in which data is saved. The performance of this instruction will /// be equal to or better than using the `XSAVE` instruction. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt) #[inline] #[target_feature(enable = "xsave,xsaveopt")] #[cfg_attr(test, assert_instr(xsaveopt))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) { xsaveopt(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } /// Performs a full or partial save of the enabled processor states to memory /// at `mem_addr`. /// /// `xsavec` differs from `xsave` in that it uses compaction and that it may /// use init optimization. State is saved based on bits `[62:0]` in `save_mask` /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec) #[inline] #[target_feature(enable = "xsave,xsavec")] #[cfg_attr(test, assert_instr(xsavec))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) { xsavec(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } /// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr` /// /// `xsaves` differs from xsave in that it can save state components /// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the /// modified optimization. State is saved based on bits `[62:0]` in `save_mask` /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves) #[inline] #[target_feature(enable = "xsave,xsaves")] #[cfg_attr(test, assert_instr(xsaves))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) { xsaves(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } /// Performs a full or partial restore of the enabled processor states using the /// state information stored in memory at `mem_addr`. /// /// `xrstors` differs from `xrstor` in that it can restore state components /// corresponding to bits set in the `IA32_XSS` `MSR`; `xrstors` cannot restore /// from an `xsave` area in which the extended region is in the standard form. /// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte /// boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors) #[inline] #[target_feature(enable = "xsave,xsaves")] #[cfg_attr(test, assert_instr(xrstors))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) { xrstors(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } #[cfg(test)] mod tests { use std::fmt; use std::prelude::v1::*; use crate::core_arch::x86::*; use stdsimd_test::simd_test; #[repr(align(64))] struct XsaveArea { // max size for 256-bit registers is 800 bytes: // see https://software.intel.com/en-us/node/682996 // max size for 512-bit registers is 2560 bytes: // FIXME: add source data: [u8; 2560], } impl XsaveArea { fn new() -> XsaveArea { XsaveArea { data: [0; 2560] } } fn ptr(&mut self) -> *mut u8 { &mut self.data[0] as *mut _ as *mut u8 } } impl PartialEq for XsaveArea { fn eq(&self, other: &XsaveArea) -> bool { for i in 0..self.data.len() { if self.data[i] != other.data[i] { return false; } } true } } impl fmt::Debug for XsaveArea { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "[")?; for i in 0..self.data.len() { write!(f, "{}", self.data[i])?; if i != self.data.len() - 1 { write!(f, ", ")?; } } write!(f, "]") } } // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 /* #[simd_test(enable = "xsave")] unsafe fn xsave() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); _xsave(a.ptr(), m); _xrstor(a.ptr(), m); _xsave(b.ptr(), m); assert_eq!(a, b); } */ #[simd_test(enable = "xsave")] unsafe fn xgetbv_xsetbv() { let xcr_n: u32 = _XCR_XFEATURE_ENABLED_MASK; let xcr: u64 = _xgetbv(xcr_n); // FIXME: XSETBV is a privileged instruction we should only test this // when running in privileged mode: // // _xsetbv(xcr_n, xcr); let xcr_cpy: u64 = _xgetbv(xcr_n); assert_eq!(xcr, xcr_cpy); } // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 /* #[simd_test(enable = "xsave,xsaveopt")] unsafe fn xsaveopt() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); _xsaveopt(a.ptr(), m); _xrstor(a.ptr(), m); _xsaveopt(b.ptr(), m); assert_eq!(a, b); } */ // FIXME: this looks like a bug in Intel's SDE: #[cfg(not(stdsimd_intel_sde))] #[simd_test(enable = "xsave,xsavec")] unsafe fn xsavec() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); _xsavec(a.ptr(), m); _xrstor(a.ptr(), m); _xsavec(b.ptr(), m); assert_eq!(a, b); } // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 /* #[simd_test(enable = "xsave,xsaves")] unsafe fn xsaves() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); _xsaves(a.ptr(), m); _xrstors(a.ptr(), m); _xsaves(b.ptr(), m); assert_eq!(a, b); } */ } core_arch-0.1.5/src/x86_64/abm.rs010064400007650000024000000037651343447103600145330ustar0000000000000000//! Advanced Bit Manipulation (ABM) instructions //! //! The POPCNT and LZCNT have their own CPUID bits to indicate support. //! //! The references are: //! //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: //! Instruction Set Reference, A-Z][intel64_ref]. //! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and //! System Instructions][amd64_ref]. //! //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions //! available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wikipedia_bmi]: //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; /// Counts the leading most significant zero bits. /// /// When the operand is zero, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u64) #[inline] #[target_feature(enable = "lzcnt")] #[cfg_attr(test, assert_instr(lzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _lzcnt_u64(x: u64) -> u64 { x.leading_zeros() as u64 } /// Counts the bits that are set. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt64) #[inline] #[target_feature(enable = "popcnt")] #[cfg_attr(test, assert_instr(popcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _popcnt64(x: i64) -> i32 { x.count_ones() as i32 } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::arch::x86_64::*; #[simd_test(enable = "lzcnt")] unsafe fn test_lzcnt_u64() { assert_eq!(_lzcnt_u64(0b0101_1010), 57); } #[simd_test(enable = "popcnt")] unsafe fn test_popcnt64() { assert_eq!(_popcnt64(0b0101_1010), 4); } } core_arch-0.1.5/src/x86_64/adx.rs010064400007650000024000000104651345562034300145430ustar0000000000000000#[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "unadjusted" { #[link_name = "llvm.x86.addcarry.64"] fn llvm_addcarry_u64(a: u8, b: u64, c: u64) -> (u8, u64); #[link_name = "llvm.x86.addcarryx.u64"] fn llvm_addcarryx_u64(a: u8, b: u64, c: u64, d: *mut u8) -> u8; #[link_name = "llvm.x86.subborrow.64"] fn llvm_subborrow_u64(a: u8, b: u64, c: u64) -> (u8, u64); } /// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` /// (carry flag), and store the unsigned 64-bit result in `out`, and the carry-out /// is returned (carry or overflow flag). #[inline] #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { let (a, b) = llvm_addcarry_u64(c_in, a, b); *out = b; a } /// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` /// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and /// the carry-out is returned (carry or overflow flag). #[inline] #[target_feature(enable = "adx")] #[cfg_attr(test, assert_instr(adc))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { llvm_addcarryx_u64(c_in, a, b, out as *mut _ as *mut u8) } /// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`. /// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and /// the carry-out is returned (carry or overflow flag). #[inline] #[cfg_attr(test, assert_instr(sbb))] #[stable(feature = "simd_x86_adx", since = "1.33.0")] pub unsafe fn _subborrow_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { let (a, b) = llvm_subborrow_u64(c_in, a, b); *out = b; a } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86_64::*; #[test] fn test_addcarry_u64() { unsafe { let a = u64::max_value(); let mut out = 0; let r = _addcarry_u64(0, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); let r = _addcarry_u64(0, a, 0, &mut out); assert_eq!(r, 0); assert_eq!(out, a); let r = _addcarry_u64(1, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 1); let r = _addcarry_u64(1, a, 0, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); let r = _addcarry_u64(0, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 7); let r = _addcarry_u64(1, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 8); } } #[simd_test(enable = "adx")] unsafe fn test_addcarryx_u64() { let a = u64::max_value(); let mut out = 0; let r = _addcarry_u64(0, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); let r = _addcarry_u64(0, a, 0, &mut out); assert_eq!(r, 0); assert_eq!(out, a); let r = _addcarry_u64(1, a, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, 1); let r = _addcarry_u64(1, a, 0, &mut out); assert_eq!(r, 1); assert_eq!(out, 0); let r = _addcarry_u64(0, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 7); let r = _addcarry_u64(1, 3, 4, &mut out); assert_eq!(r, 0); assert_eq!(out, 8); } #[test] fn test_subborrow_u64() { unsafe { let a = u64::max_value(); let mut out = 0; let r = _subborrow_u64(0, 0, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, a); let r = _subborrow_u64(0, 0, 0, &mut out); assert_eq!(r, 0); assert_eq!(out, 0); let r = _subborrow_u64(1, 0, 1, &mut out); assert_eq!(r, 1); assert_eq!(out, a - 1); let r = _subborrow_u64(1, 0, 0, &mut out); assert_eq!(r, 1); assert_eq!(out, a); let r = _subborrow_u64(0, 7, 3, &mut out); assert_eq!(r, 0); assert_eq!(out, 4); let r = _subborrow_u64(1, 7, 3, &mut out); assert_eq!(r, 0); assert_eq!(out, 3); } } } core_arch-0.1.5/src/x86_64/avx.rs010064400007650000024000000032231343447103600145570ustar0000000000000000//! Advanced Vector Extensions (AVX) //! //! The references are: //! //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: //! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture //! Programmer's Manual, Volume 3: General-Purpose and System //! Instructions][amd64_ref]. //! //! [Wikipedia][wiki] provides a quick overview of the instructions available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions use crate::{ core_arch::{simd_llvm::*, x86::*}, mem::transmute, }; /// Copies `a` to result, and insert the 64-bit integer `i` into result /// at the location specified by `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64) #[inline] #[rustc_args_required_const(2)] #[target_feature(enable = "avx")] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i { transmute(simd_insert(a.as_i64x4(), (index as u32) & 3, i)) } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86::*; #[simd_test(enable = "avx")] unsafe fn test_mm256_insert_epi64() { let a = _mm256_setr_epi64x(1, 2, 3, 4); let r = _mm256_insert_epi64(a, 0, 3); let e = _mm256_setr_epi64x(1, 2, 3, 0); assert_eq_m256i(r, e); } } core_arch-0.1.5/src/x86_64/avx2.rs010064400007650000024000000034401343447103600146420ustar0000000000000000//! Advanced Vector Extensions 2 (AVX) //! //! AVX2 expands most AVX commands to 256-bit wide vector registers and //! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate). //! //! The references are: //! //! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: //! Instruction Set Reference, A-Z][intel64_ref]. //! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and //! System Instructions][amd64_ref]. //! //! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick //! overview of the instructions available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate use crate::core_arch::{simd_llvm::*, x86::*}; /// Extracts a 64-bit integer from `a`, selected with `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64) #[inline] #[target_feature(enable = "avx2")] #[rustc_args_required_const(1)] // This intrinsic has no corresponding instruction. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 { let imm8 = (imm8 & 3) as u32; simd_extract(a.as_i64x4(), imm8) } #[cfg(test)] mod tests { use crate::core_arch::arch::x86_64::*; use stdsimd_test::simd_test; #[simd_test(enable = "avx2")] unsafe fn test_mm256_extract_epi64() { let a = _mm256_setr_epi64x(0, 1, 2, 3); let r = _mm256_extract_epi64(a, 3); assert_eq!(r, 3); } } core_arch-0.1.5/src/x86_64/bmi.rs010064400007650000024000000140651343447103600145360ustar0000000000000000//! Bit Manipulation Instruction (BMI) Set 1.0. //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. //! //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions //! available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; /// Extracts bits in range [`start`, `start` + `length`) from `a` into /// the least significant bits of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 { _bextr2_u64(a, ((start & 0xff) | ((len & 0xff) << 8)) as u64) } /// Extracts bits of `a` specified by `control` into /// the least significant bits of the result. /// /// Bits `[7,0]` of `control` specify the index to the first bit in the range /// to be extracted, and bits `[15,8]` specify the length of the range. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(bextr))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 { x86_bmi_bextr_64(a, control) } /// Bitwise logical `AND` of inverted `a` with `b`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(andn))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 { !a & b } /// Extracts lowest set isolated bit. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsi))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsi_u64(x: u64) -> u64 { x & x.wrapping_neg() } /// Gets mask up to lowest set bit. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsmsk))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsmsk_u64(x: u64) -> u64 { x ^ (x.wrapping_sub(1_u64)) } /// Resets the lowest set bit of `x`. /// /// If `x` is sets CF. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(blsr))] #[cfg(not(target_arch = "x86"))] // generates lots of instructions #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _blsr_u64(x: u64) -> u64 { x & (x.wrapping_sub(1)) } /// Counts the number of trailing least significant zero bits. /// /// When the source operand is `0`, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _tzcnt_u64(x: u64) -> u64 { x.trailing_zeros() as u64 } /// Counts the number of trailing least significant zero bits. /// /// When the source operand is `0`, it returns its size in bits. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64) #[inline] #[target_feature(enable = "bmi1")] #[cfg_attr(test, assert_instr(tzcnt))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 { x.trailing_zeros() as i64 } extern "C" { #[link_name = "llvm.x86.bmi.bextr.64"] fn x86_bmi_bextr_64(x: u64, y: u64) -> u64; } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::{x86::*, x86_64::*}; #[simd_test(enable = "bmi1")] unsafe fn test_bextr_u64() { let r = _bextr_u64(0b0101_0000u64, 4, 4); assert_eq!(r, 0b0000_0101u64); } #[simd_test(enable = "bmi1")] unsafe fn test_andn_u64() { assert_eq!(_andn_u64(0, 0), 0); assert_eq!(_andn_u64(0, 1), 1); assert_eq!(_andn_u64(1, 0), 0); assert_eq!(_andn_u64(1, 1), 0); let r = _andn_u64(0b0000_0000u64, 0b0000_0000u64); assert_eq!(r, 0b0000_0000u64); let r = _andn_u64(0b0000_0000u64, 0b1111_1111u64); assert_eq!(r, 0b1111_1111u64); let r = _andn_u64(0b1111_1111u64, 0b0000_0000u64); assert_eq!(r, 0b0000_0000u64); let r = _andn_u64(0b1111_1111u64, 0b1111_1111u64); assert_eq!(r, 0b0000_0000u64); let r = _andn_u64(0b0100_0000u64, 0b0101_1101u64); assert_eq!(r, 0b0001_1101u64); } #[simd_test(enable = "bmi1")] unsafe fn test_blsi_u64() { assert_eq!(_blsi_u64(0b1101_0000u64), 0b0001_0000u64); } #[simd_test(enable = "bmi1")] unsafe fn test_blsmsk_u64() { let r = _blsmsk_u64(0b0011_0000u64); assert_eq!(r, 0b0001_1111u64); } #[simd_test(enable = "bmi1")] unsafe fn test_blsr_u64() { // TODO: test the behavior when the input is `0`. let r = _blsr_u64(0b0011_0000u64); assert_eq!(r, 0b0010_0000u64); } #[simd_test(enable = "bmi1")] unsafe fn test_tzcnt_u64() { assert_eq!(_tzcnt_u64(0b0000_0001u64), 0u64); assert_eq!(_tzcnt_u64(0b0000_0000u64), 64u64); assert_eq!(_tzcnt_u64(0b1001_0000u64), 4u64); } } core_arch-0.1.5/src/x86_64/bmi2.rs010064400007650000024000000107271343447103600146210ustar0000000000000000//! Bit Manipulation Instruction (BMI) Set 2.0. //! //! The reference is [Intel 64 and IA-32 Architectures Software Developer's //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. //! //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions //! available. //! //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf //! [wikipedia_bmi]: //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 #[cfg(test)] use stdsimd_test::assert_instr; /// Unsigned multiply without affecting flags. /// /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with /// the low half and the high half of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u64) #[inline] #[cfg_attr(test, assert_instr(mul))] #[target_feature(enable = "bmi2")] #[cfg(not(target_arch = "x86"))] // calls an intrinsic #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { let result: u128 = (a as u128) * (b as u128); *hi = (result >> 64) as u64; result as u64 } /// Zeroes higher bits of `a` >= `index`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(bzhi))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 { x86_bmi2_bzhi_64(a, index as u64) } /// Scatter contiguous low order bits of `a` to the result at the positions /// specified by the `mask`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u64) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pdep))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 { x86_bmi2_pdep_64(a, mask) } /// Gathers the bits of `x` specified by the `mask` into the contiguous low /// order bit positions of the result. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u64) #[inline] #[target_feature(enable = "bmi2")] #[cfg_attr(test, assert_instr(pext))] #[cfg(not(target_arch = "x86"))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 { x86_bmi2_pext_64(a, mask) } extern "C" { #[link_name = "llvm.x86.bmi.bzhi.64"] fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64; #[link_name = "llvm.x86.bmi.pdep.64"] fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64; #[link_name = "llvm.x86.bmi.pext.64"] fn x86_bmi2_pext_64(x: u64, y: u64) -> u64; } #[cfg(test)] mod tests { use stdsimd_test::simd_test; use crate::core_arch::x86_64::*; #[simd_test(enable = "bmi2")] unsafe fn test_pext_u64() { let n = 0b1011_1110_1001_0011u64; let m0 = 0b0110_0011_1000_0101u64; let s0 = 0b0000_0000_0011_0101u64; let m1 = 0b1110_1011_1110_1111u64; let s1 = 0b0001_0111_0100_0011u64; assert_eq!(_pext_u64(n, m0), s0); assert_eq!(_pext_u64(n, m1), s1); } #[simd_test(enable = "bmi2")] unsafe fn test_pdep_u64() { let n = 0b1011_1110_1001_0011u64; let m0 = 0b0110_0011_1000_0101u64; let s0 = 0b0000_0010_0000_0101u64; let m1 = 0b1110_1011_1110_1111u64; let s1 = 0b1110_1001_0010_0011u64; assert_eq!(_pdep_u64(n, m0), s0); assert_eq!(_pdep_u64(n, m1), s1); } #[simd_test(enable = "bmi2")] unsafe fn test_bzhi_u64() { let n = 0b1111_0010u64; let s = 0b0001_0010u64; assert_eq!(_bzhi_u64(n, 5), s); } #[simd_test(enable = "bmi2")] #[rustfmt::skip] unsafe fn test_mulx_u64() { let a: u64 = 9_223_372_036_854_775_800; let b: u64 = 100; let mut hi = 0; let lo = _mulx_u64(a, b, &mut hi); /* result = 922337203685477580000 = 0b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000 ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ assert_eq!( lo, 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64 ); assert_eq!(hi, 0b00110001u64); } } core_arch-0.1.5/src/x86_64/bswap.rs010064400007650000024000000014601345561510300150740ustar0000000000000000//! Byte swap intrinsics. #![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; /// Returns an integer with the reversed byte order of x /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap64) #[inline] #[cfg_attr(test, assert_instr(bswap))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _bswap64(x: i64) -> i64 { bswap_i64(x) } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.bswap.i64"] fn bswap_i64(x: i64) -> i64; } #[cfg(test)] mod tests { use super::*; #[test] fn test_bswap64() { unsafe { assert_eq!(_bswap64(0x0EADBEEFFADECA0E), 0x0ECADEFAEFBEAD0E); assert_eq!(_bswap64(0x0000000000000000), 0x0000000000000000); } } } core_arch-0.1.5/src/x86_64/bt.rs010064400007650000024000000060131344736322300143700ustar0000000000000000#[cfg(test)] use stdsimd_test::assert_instr; /// Returns the bit in position `b` of the memory addressed by `p`. #[inline] #[cfg_attr(test, assert_instr(bt))] #[unstable(feature = "simd_x86_bittest", issue = "59414")] pub unsafe fn _bittest64(p: *const i64, b: i64) -> u8 { let r: u8; asm!("btq $2, $1\n\tsetc ${0:b}" : "=r"(r) : "*m"(p), "r"(b) : "cc", "memory"); r } /// Returns the bit in position `b` of the memory addressed by `p`, then sets the bit to `1`. #[inline] #[cfg_attr(test, assert_instr(bts))] #[unstable(feature = "simd_x86_bittest", issue = "59414")] pub unsafe fn _bittestandset64(p: *mut i64, b: i64) -> u8 { let r: u8; asm!("btsq $2, $1\n\tsetc ${0:b}" : "=r"(r), "+*m"(p) : "r"(b) : "cc", "memory"); r } /// Returns the bit in position `b` of the memory addressed by `p`, then resets that bit to `0`. #[inline] #[cfg_attr(test, assert_instr(btr))] #[unstable(feature = "simd_x86_bittest", issue = "59414")] pub unsafe fn _bittestandreset64(p: *mut i64, b: i64) -> u8 { let r: u8; asm!("btrq $2, $1\n\tsetc ${0:b}" : "=r"(r), "+*m"(p) : "r"(b) : "cc", "memory"); r } /// Returns the bit in position `b` of the memory addressed by `p`, then inverts that bit. #[inline] #[cfg_attr(test, assert_instr(btc))] #[unstable(feature = "simd_x86_bittest", issue = "59414")] pub unsafe fn _bittestandcomplement64(p: *mut i64, b: i64) -> u8 { let r: u8; asm!("btcq $2, $1\n\tsetc ${0:b}" : "=r"(r), "+*m"(p) : "r"(b) : "cc", "memory"); r } #[cfg(test)] mod tests { use crate::core_arch::x86_64::*; #[test] fn test_bittest64() { unsafe { let a = 0b0101_0000i64; assert_eq!(_bittest64(&a as _, 4), 1); assert_eq!(_bittest64(&a as _, 5), 0); } } #[test] fn test_bittestandset64() { unsafe { let mut a = 0b0101_0000i64; assert_eq!(_bittestandset64(&mut a as _, 4), 1); assert_eq!(_bittestandset64(&mut a as _, 4), 1); assert_eq!(_bittestandset64(&mut a as _, 5), 0); assert_eq!(_bittestandset64(&mut a as _, 5), 1); } } #[test] fn test_bittestandreset64() { unsafe { let mut a = 0b0101_0000i64; assert_eq!(_bittestandreset64(&mut a as _, 4), 1); assert_eq!(_bittestandreset64(&mut a as _, 4), 0); assert_eq!(_bittestandreset64(&mut a as _, 5), 0); assert_eq!(_bittestandreset64(&mut a as _, 5), 0); } } #[test] fn test_bittestandcomplement64() { unsafe { let mut a = 0b0101_0000i64; assert_eq!(_bittestandcomplement64(&mut a as _, 4), 1); assert_eq!(_bittestandcomplement64(&mut a as _, 4), 0); assert_eq!(_bittestandcomplement64(&mut a as _, 4), 1); assert_eq!(_bittestandcomplement64(&mut a as _, 5), 0); assert_eq!(_bittestandcomplement64(&mut a as _, 5), 1); } } } core_arch-0.1.5/src/x86_64/cmpxchg16b.rs010064400007650000024000000062011345562034300157220ustar0000000000000000use crate::sync::atomic::Ordering; #[cfg(test)] use stdsimd_test::assert_instr; /// Compares and exchange 16 bytes (128 bits) of data atomically. /// /// This intrinsic corresponds to the `cmpxchg16b` instruction on `x86_64` /// processors. It performs an atomic compare-and-swap, updating the `ptr` /// memory location to `val` if the current value in memory equals `old`. /// /// # Return value /// /// This function returns the previous value at the memory location. If it is /// equal to `old` then the memory was updated to `new`. /// /// # Memory Orderings /// /// This atomic operations has the same semantics of memory orderings as /// `AtomicUsize::compare_exchange` does, only operating on 16 bytes of memory /// instead of just a pointer. /// /// For more information on memory orderings here see the `compare_exchange` /// documentation for other `Atomic*` types in the standard library. /// /// # Unsafety /// /// This method is unsafe because it takes a raw pointer and will attempt to /// read and possibly write the memory at the pointer. The pointer must also be /// aligned on a 16-byte boundary. /// /// This method also requires the `cmpxchg16b` CPU feature to be available at /// runtime to work correctly. If the CPU running the binary does not actually /// support `cmpxchg16b` and the program enters an execution path that /// eventually would reach this function the behavior is undefined. /// /// The `success` ordering must also be stronger or equal to `failure`, or this /// function call is undefined. See the `Atomic*` documentation's /// `compare_exchange` function for more information. When `compare_exchange` /// panics, this is undefined behavior. Currently this function aborts the /// process with an undefined instruction. #[inline] #[cfg_attr(test, assert_instr(cmpxchg16b, success = Ordering::SeqCst, failure = Ordering::SeqCst))] #[target_feature(enable = "cmpxchg16b")] pub unsafe fn cmpxchg16b( dst: *mut u128, old: u128, new: u128, success: Ordering, failure: Ordering, ) -> u128 { use crate::intrinsics; use crate::sync::atomic::Ordering::*; debug_assert!(dst as usize % 16 == 0); let (val, _ok) = match (success, failure) { (Acquire, Acquire) => intrinsics::atomic_cxchg_acq(dst, old, new), (Release, Relaxed) => intrinsics::atomic_cxchg_rel(dst, old, new), (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel(dst, old, new), (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed(dst, old, new), (SeqCst, SeqCst) => intrinsics::atomic_cxchg(dst, old, new), (Acquire, Relaxed) => intrinsics::atomic_cxchg_acq_failrelaxed(dst, old, new), (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_failrelaxed(dst, old, new), (SeqCst, Relaxed) => intrinsics::atomic_cxchg_failrelaxed(dst, old, new), (SeqCst, Acquire) => intrinsics::atomic_cxchg_failacq(dst, old, new), // The above block is all copied from libcore, and this statement is // also copied from libcore except that it's a panic in libcore and we // have a little bit more of a lightweight panic here. _ => crate::core_arch::x86::ud2(), }; val } core_arch-0.1.5/src/x86_64/fxsr.rs010064400007650000024000000063721343447103600147530ustar0000000000000000//! FXSR floating-point context fast save and restor. #[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.fxsave64"] fn fxsave64(p: *mut u8) -> (); #[link_name = "llvm.x86.fxrstor64"] fn fxrstor64(p: *const u8) -> (); } /// Saves the `x87` FPU, `MMX` technology, `XMM`, and `MXCSR` registers to the /// 512-byte-long 16-byte-aligned memory region `mem_addr`. /// /// A misaligned destination operand raises a general-protection (#GP) or an /// alignment check exception (#AC). /// /// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. /// /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave64) #[inline] #[target_feature(enable = "fxsr")] #[cfg_attr(test, assert_instr(fxsave64))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _fxsave64(mem_addr: *mut u8) { fxsave64(mem_addr) } /// Restores the `XMM`, `MMX`, `MXCSR`, and `x87` FPU registers from the /// 512-byte-long 16-byte-aligned memory region `mem_addr`. /// /// The contents of this memory region should have been written to by a /// previous /// `_fxsave` or `_fxsave64` intrinsic. /// /// A misaligned destination operand raises a general-protection (#GP) or an /// alignment check exception (#AC). /// /// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. /// /// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html /// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor64) #[inline] #[target_feature(enable = "fxsr")] #[cfg_attr(test, assert_instr(fxrstor64))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _fxrstor64(mem_addr: *const u8) { fxrstor64(mem_addr) } #[cfg(test)] mod tests { use crate::core_arch::x86_64::*; use std::{cmp::PartialEq, fmt}; use stdsimd_test::simd_test; #[repr(align(16))] struct FxsaveArea { data: [u8; 512], // 512 bytes } impl FxsaveArea { fn new() -> FxsaveArea { FxsaveArea { data: [0; 512] } } fn ptr(&mut self) -> *mut u8 { &mut self.data[0] as *mut _ as *mut u8 } } impl PartialEq for FxsaveArea { fn eq(&self, other: &FxsaveArea) -> bool { for i in 0..self.data.len() { if self.data[i] != other.data[i] { return false; } } true } } impl fmt::Debug for FxsaveArea { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "[")?; for i in 0..self.data.len() { write!(f, "{}", self.data[i])?; if i != self.data.len() - 1 { write!(f, ", ")?; } } write!(f, "]") } } #[simd_test(enable = "fxsr")] unsafe fn fxsave64() { let mut a = FxsaveArea::new(); let mut b = FxsaveArea::new(); fxsr::_fxsave64(a.ptr()); fxsr::_fxrstor64(a.ptr()); fxsr::_fxsave64(b.ptr()); assert_eq!(a, b); } } core_arch-0.1.5/src/x86_64/mod.rs010064400007650000024000000011021344736322300145340ustar0000000000000000//! `x86_64` intrinsics mod fxsr; pub use self::fxsr::*; mod sse; pub use self::sse::*; mod sse2; pub use self::sse2::*; mod sse41; pub use self::sse41::*; mod sse42; pub use self::sse42::*; mod xsave; pub use self::xsave::*; mod abm; pub use self::abm::*; mod avx; pub use self::avx::*; mod bmi; pub use self::bmi::*; mod bmi2; pub use self::bmi2::*; mod avx2; pub use self::avx2::*; mod bswap; pub use self::bswap::*; mod rdrand; pub use self::rdrand::*; mod cmpxchg16b; pub use self::cmpxchg16b::*; mod adx; pub use self::adx::*; mod bt; pub use self::bt::*; core_arch-0.1.5/src/x86_64/rdrand.rs010064400007650000024000000027621345561510300152400ustar0000000000000000//! RDRAND and RDSEED instructions for returning random numbers from an Intel //! on-chip hardware random number generator which has been seeded by an //! on-chip entropy source. #![allow(clippy::module_name_repetitions)] #[allow(improper_ctypes)] extern "unadjusted" { #[link_name = "llvm.x86.rdrand.64"] fn x86_rdrand64_step() -> (u64, i32); #[link_name = "llvm.x86.rdseed.64"] fn x86_rdseed64_step() -> (u64, i32); } #[cfg(test)] use stdsimd_test::assert_instr; /// Read a hardware generated 64-bit random value and store the result in val. /// Returns 1 if a random value was generated, and 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand64_step) #[inline] #[target_feature(enable = "rdrand")] #[cfg_attr(test, assert_instr(rdrand))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 { let (v, flag) = x86_rdrand64_step(); *val = v; flag } /// Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store /// in val. Return 1 if a random value was generated, and 0 otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed64_step) #[inline] #[target_feature(enable = "rdseed")] #[cfg_attr(test, assert_instr(rdseed))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _rdseed64_step(val: &mut u64) -> i32 { let (v, flag) = x86_rdseed64_step(); *val = v; flag } core_arch-0.1.5/src/x86_64/sse.rs010064400007650000024000000114511343447103600145550ustar0000000000000000//! `x86_64` Streaming SIMD Extensions (SSE) use crate::core_arch::x86::*; #[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse.cvtss2si64"] fn cvtss2si64(a: __m128) -> i64; #[link_name = "llvm.x86.sse.cvttss2si64"] fn cvttss2si64(a: __m128) -> i64; #[link_name = "llvm.x86.sse.cvtsi642ss"] fn cvtsi642ss(a: __m128, b: i64) -> __m128; } /// Converts the lowest 32 bit float in the input vector to a 64 bit integer. /// /// The result is rounded according to the current rounding mode. If the result /// cannot be represented as a 64 bit integer the result will be /// `0x8000_0000_0000_0000` (`std::i64::MIN`) or trigger an invalid operation /// floating point exception if unmasked (see /// [`_mm_setcsr`](fn._mm_setcsr.html)). /// /// This corresponds to the `CVTSS2SI` instruction (with 64 bit output). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 { cvtss2si64(a) } /// Converts the lowest 32 bit float in the input vector to a 64 bit integer /// with truncation. /// /// The result is rounded always using truncation (round towards zero). If the /// result cannot be represented as a 64 bit integer the result will be /// `0x8000_0000_0000_0000` (`std::i64::MIN`) or an invalid operation floating /// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). /// /// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvttss2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 { cvttss2si64(a) } /// Converts a 64 bit integer to a 32 bit float. The result vector is the input /// vector `a` with the lowest 32 bit float replaced by the converted integer. /// /// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit /// input). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss) #[inline] #[target_feature(enable = "sse")] #[cfg_attr(test, assert_instr(cvtsi2ss))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { cvtsi642ss(a, b) } #[cfg(test)] mod tests { use crate::core_arch::arch::x86_64::*; use std::{f32::NAN, i64::MIN}; use stdsimd_test::simd_test; #[simd_test(enable = "sse")] unsafe fn test_mm_cvtss_si64() { let inputs = &[ (42.0f32, 42i64), (-31.4, -31), (-33.5, -34), (-34.5, -34), (4.0e10, 40_000_000_000), (4.0e-10, 0), (NAN, MIN), (2147483500.1, 2147483520), (9.223371e18, 9223370937343148032), ]; for i in 0..inputs.len() { let (xi, e) = inputs[i]; let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); let r = _mm_cvtss_si64(x); assert_eq!( e, r, "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", i, x, r, e ); } } #[simd_test(enable = "sse")] unsafe fn test_mm_cvttss_si64() { let inputs = &[ (42.0f32, 42i64), (-31.4, -31), (-33.5, -33), (-34.5, -34), (10.999, 10), (-5.99, -5), (4.0e10, 40_000_000_000), (4.0e-10, 0), (NAN, MIN), (2147483500.1, 2147483520), (9.223371e18, 9223370937343148032), (9.223372e18, MIN), ]; for i in 0..inputs.len() { let (xi, e) = inputs[i]; let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); let r = _mm_cvttss_si64(x); assert_eq!( e, r, "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", i, x, r, e ); } } #[simd_test(enable = "sse")] pub unsafe fn test_mm_cvtsi64_ss() { let inputs = &[ (4555i64, 4555.0f32), (322223333, 322223330.0), (-432, -432.0), (-322223333, -322223330.0), (9223372036854775807, 9.223372e18), (-9223372036854775808, -9.223372e18), ]; for i in 0..inputs.len() { let (x, f) = inputs[i]; let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); let r = _mm_cvtsi64_ss(a, x); let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); assert_eq_m128(e, r); } } } core_arch-0.1.5/src/x86_64/sse2.rs010064400007650000024000000152111343447103600146350ustar0000000000000000//! `x86_64`'s Streaming SIMD Extensions 2 (SSE2) use crate::{ core_arch::{simd_llvm::*, x86::*}, intrinsics, }; #[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse2.cvtsd2si64"] fn cvtsd2si64(a: __m128d) -> i64; #[link_name = "llvm.x86.sse2.cvttsd2si64"] fn cvttsd2si64(a: __m128d) -> i64; } /// Converts the lower double-precision (64-bit) floating-point element in a to /// a 64-bit integer. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 { cvtsd2si64(a) } /// Alias for `_mm_cvtsd_si64` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64x) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 { _mm_cvtsd_si64(a) } /// Converts the lower double-precision (64-bit) floating-point element in `a` /// to a 64-bit integer with truncation. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 { cvttsd2si64(a) } /// Alias for `_mm_cvttsd_si64` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvttsd2si))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 { _mm_cvttsd_si64(a) } /// Stores a 64-bit integer value in the specified memory location. /// To minimize caching, the data is flagged as non-temporal (unlikely to be /// used again soon). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(movnti))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) { intrinsics::nontemporal_store(mem_addr, a); } /// Returns a vector whose lowest element is `a` and all higher elements are /// `0`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(windows)), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i { _mm_set_epi64x(0, a) } /// Returns a vector whose lowest element is `a` and all higher elements are /// `0`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(windows)), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i { _mm_cvtsi64_si128(a) } /// Returns the lowest element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(windows)), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 { simd_extract(a.as_i64x2(), 0) } /// Returns the lowest element of `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(all(test, not(windows)), assert_instr(movq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { _mm_cvtsi128_si64(a) } /// Returns `a` with its lower element replaced by `b` after converting it to /// an `f64`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsi2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { simd_insert(a, 0, b as f64) } /// Returns `a` with its lower element replaced by `b` after converting it to /// an `f64`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd) #[inline] #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(cvtsi2sd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d { _mm_cvtsi64_sd(a, b) } #[cfg(test)] mod tests { use std::{f64, i64}; use stdsimd_test::simd_test; use crate::core_arch::arch::x86_64::*; #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsd_si64() { let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0)); assert_eq!(r, -2_i64); let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN)); assert_eq!(r, i64::MIN); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsd_si64x() { let r = _mm_cvtsd_si64x(_mm_setr_pd(f64::NAN, f64::NAN)); assert_eq!(r, i64::MIN); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvttsd_si64() { let a = _mm_setr_pd(-1.1, 2.2); let r = _mm_cvttsd_si64(a); assert_eq!(r, -1_i64); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvttsd_si64x() { let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); let r = _mm_cvttsd_si64x(a); assert_eq!(r, i64::MIN); } #[simd_test(enable = "sse2")] unsafe fn test_mm_stream_si64() { let a: i64 = 7; let mut mem = ::std::boxed::Box::::new(-1); _mm_stream_si64(&mut *mem as *mut i64, a); assert_eq!(a, *mem); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsi64_si128() { let r = _mm_cvtsi64_si128(5); assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsi128_si64() { let r = _mm_cvtsi128_si64(_mm_setr_epi64x(5, 0)); assert_eq!(r, 5); } #[simd_test(enable = "sse2")] unsafe fn test_mm_cvtsi64_sd() { let a = _mm_set1_pd(3.5); let r = _mm_cvtsi64_sd(a, 5); assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); } } core_arch-0.1.5/src/x86_64/sse41.rs010064400007650000024000000035731343447103600147300ustar0000000000000000//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1) use crate::{ core_arch::{simd_llvm::*, x86::*}, mem::transmute, }; #[cfg(test)] use stdsimd_test::assert_instr; /// Extracts an 64-bit integer from `a` selected with `imm8` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(pextrq, imm8 = 1))] #[rustc_args_required_const(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 { let imm8 = (imm8 & 1) as u32; simd_extract(a.as_i64x2(), imm8) } /// Returns a copy of `a` with the 64-bit integer from `i` inserted at a /// location specified by `imm8`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64) #[inline] #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))] #[rustc_args_required_const(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i { transmute(simd_insert(a.as_i64x2(), (imm8 & 1) as u32, i)) } #[cfg(test)] mod tests { use crate::core_arch::arch::x86_64::*; use stdsimd_test::simd_test; #[simd_test(enable = "sse4.1")] unsafe fn test_mm_extract_epi64() { let a = _mm_setr_epi64x(0, 1); let r = _mm_extract_epi64(a, 1); assert_eq!(r, 1); let r = _mm_extract_epi64(a, 3); assert_eq!(r, 1); } #[simd_test(enable = "sse4.1")] unsafe fn test_mm_insert_epi64() { let a = _mm_set1_epi64x(0); let e = _mm_setr_epi64x(0, 32); let r = _mm_insert_epi64(a, 32, 1); assert_eq_m128i(r, e); let r = _mm_insert_epi64(a, 32, 3); assert_eq_m128i(r, e); } } core_arch-0.1.5/src/x86_64/sse42.rs010064400007650000024000000017611343447103600147260ustar0000000000000000//! `x86_64`'s Streaming SIMD Extensions 4.2 (SSE4.2) #[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse42.crc32.64.64"] fn crc32_64_64(crc: u64, v: u64) -> u64; } /// Starting with the initial value in `crc`, return the accumulated /// CRC32 value for unsigned 64-bit integer `v`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u64) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 { crc32_64_64(crc, v) } #[cfg(test)] mod tests { use crate::core_arch::arch::x86_64::*; use stdsimd_test::simd_test; #[simd_test(enable = "sse4.2")] unsafe fn test_mm_crc32_u64() { let crc = 0x7819dccd3e824; let v = 0x2a22b845fed; let i = _mm_crc32_u64(crc, v); assert_eq!(i, 0xbb6cdc6c); } } core_arch-0.1.5/src/x86_64/xsave.rs010064400007650000024000000201051345561510300151030ustar0000000000000000//! `x86_64`'s `xsave` and `xsaveopt` target feature intrinsics #![allow(clippy::module_name_repetitions)] #[cfg(test)] use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.xsave64"] fn xsave64(p: *mut u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xrstor64"] fn xrstor64(p: *const u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xsaveopt64"] fn xsaveopt64(p: *mut u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xsavec64"] fn xsavec64(p: *mut u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xsaves64"] fn xsaves64(p: *mut u8, hi: u32, lo: u32) -> (); #[link_name = "llvm.x86.xrstors64"] fn xrstors64(p: *const u8, hi: u32, lo: u32) -> (); } /// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr`. /// /// State is saved based on bits `[62:0]` in `save_mask` and XCR0. /// `mem_addr` must be aligned on a 64-byte boundary. /// /// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of /// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave64) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xsave64))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) { xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } /// Performs a full or partial restore of the enabled processor states using /// the state information stored in memory at `mem_addr`. /// /// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte /// boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor64) #[inline] #[target_feature(enable = "xsave")] #[cfg_attr(test, assert_instr(xrstor64))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) { xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } /// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr`. /// /// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`. /// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize /// the manner in which data is saved. The performance of this instruction will /// be equal to or better than using the `XSAVE64` instruction. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt64) #[inline] #[target_feature(enable = "xsave,xsaveopt")] #[cfg_attr(test, assert_instr(xsaveopt64))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) { xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } /// Performs a full or partial save of the enabled processor states to memory /// at `mem_addr`. /// /// `xsavec` differs from `xsave` in that it uses compaction and that it may /// use init optimization. State is saved based on bits `[62:0]` in `save_mask` /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec64) #[inline] #[target_feature(enable = "xsave,xsavec")] #[cfg_attr(test, assert_instr(xsavec64))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) { xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } /// Performs a full or partial save of the enabled processor states to memory at /// `mem_addr` /// /// `xsaves` differs from xsave in that it can save state components /// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the /// modified optimization. State is saved based on bits `[62:0]` in `save_mask` /// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves64) #[inline] #[target_feature(enable = "xsave,xsaves")] #[cfg_attr(test, assert_instr(xsaves64))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) { xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); } /// Performs a full or partial restore of the enabled processor states using the /// state information stored in memory at `mem_addr`. /// /// `xrstors` differs from `xrstor` in that it can restore state components /// corresponding to bits set in the `IA32_XSS` `MSR`; `xrstors` cannot restore /// from an `xsave` area in which the extended region is in the standard form. /// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and /// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte /// boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors64) #[inline] #[target_feature(enable = "xsave,xsaves")] #[cfg_attr(test, assert_instr(xrstors64))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) { xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); } // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 // All these tests fail with Intel SDE. /* #[cfg(test)] mod tests { use crate::core_arch::x86::x86_64::xsave; use stdsimd_test::simd_test; use std::fmt; // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/209 #[repr(align(64))] struct XsaveArea { // max size for 256-bit registers is 800 bytes: // see https://software.intel.com/en-us/node/682996 // max size for 512-bit registers is 2560 bytes: // FIXME: add source data: [u8; 2560], } impl XsaveArea { fn new() -> XsaveArea { XsaveArea { data: [0; 2560] } } fn ptr(&mut self) -> *mut u8 { &mut self.data[0] as *mut _ as *mut u8 } } impl PartialEq for XsaveArea { fn eq(&self, other: &XsaveArea) -> bool { for i in 0..self.data.len() { if self.data[i] != other.data[i] { return false; } } true } } impl fmt::Debug for XsaveArea { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "[")?; for i in 0..self.data.len() { write!(f, "{}", self.data[i])?; if i != self.data.len() - 1 { write!(f, ", ")?; } } write!(f, "]") } } #[simd_test(enable = "xsave")] unsafe fn xsave64() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); xsave::_xsave64(a.ptr(), m); xsave::_xrstor64(a.ptr(), m); xsave::_xsave64(b.ptr(), m); assert_eq!(a, b); } #[simd_test(enable = "xsave,xsaveopt")] unsafe fn xsaveopt64() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); xsave::_xsaveopt64(a.ptr(), m); xsave::_xrstor64(a.ptr(), m); xsave::_xsaveopt64(b.ptr(), m); assert_eq!(a, b); } #[simd_test(enable = "xsave,xsavec")] unsafe fn xsavec64() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); xsave::_xsavec64(a.ptr(), m); xsave::_xrstor64(a.ptr(), m); xsave::_xsavec64(b.ptr(), m); assert_eq!(a, b); } #[simd_test(enable = "xsave,xsaves")] unsafe fn xsaves64() { let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers let mut a = XsaveArea::new(); let mut b = XsaveArea::new(); xsave::_xsaves64(a.ptr(), m); xsave::_xrstors64(a.ptr(), m); xsave::_xsaves64(b.ptr(), m); assert_eq!(a, b); } } */ core_arch-0.1.5/tests/cpu-detection.rs010064400007650000024000000045221345561510300161420ustar0000000000000000#![feature(stdsimd)] #![cfg_attr(stdsimd_strict, deny(warnings))] #![allow(clippy::option_unwrap_used, clippy::print_stdout, clippy::use_debug)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[macro_use] extern crate std_detect; #[test] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] fn x86_all() { println!("sse: {:?}", is_x86_feature_detected!("sse")); println!("sse2: {:?}", is_x86_feature_detected!("sse2")); println!("sse3: {:?}", is_x86_feature_detected!("sse3")); println!("ssse3: {:?}", is_x86_feature_detected!("ssse3")); println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1")); println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2")); println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); println!("avx: {:?}", is_x86_feature_detected!("avx")); println!("avx2: {:?}", is_x86_feature_detected!("avx2")); println!("avx512f {:?}", is_x86_feature_detected!("avx512f")); println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd")); println!("avx512er {:?}", is_x86_feature_detected!("avx512er")); println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf")); println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw")); println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq")); println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl")); println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma")); println!("avx512_vbmi {:?}", is_x86_feature_detected!("avx512vbmi")); println!( "avx512_vpopcntdq {:?}", is_x86_feature_detected!("avx512vpopcntdq") ); println!("fma: {:?}", is_x86_feature_detected!("fma")); println!("abm: {:?}", is_x86_feature_detected!("abm")); println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); println!("tbm: {:?}", is_x86_feature_detected!("tbm")); println!("popcnt: {:?}", is_x86_feature_detected!("popcnt")); println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); println!("xsave: {:?}", is_x86_feature_detected!("xsave")); println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt")); println!("xsaves: {:?}", is_x86_feature_detected!("xsaves")); println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); } core_arch-0.1.5/.cargo_vcs_info.json0000644000000001120000000000000130150ustar00{ "git": { "sha1": "996300954ded4b4e18a51c368c67ac2547dc0de9" } }