packed_simd-0.3.3/.appveyor.yml010064400007650000024000000033621342460246600146600ustar0000000000000000matrix: allow_failures: # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/72 - TARGET: i686-pc-windows-msvc - TARGET: i686-pc-windows-gnu - TARGET: x86_64-pc-windows-gnu fast_finish: true environment: matrix: - TARGET: x86_64-pc-windows-msvc MSYSTEM: MINGW64 NOVERIFY: "1" - TARGET: x86_64-pc-windows-msvc MSYSTEM: MINGW64 RUSTFLAGS: "-C target-feature=+sse4.2" NOVERIFY: "1" - TARGET: x86_64-pc-windows-msvc MSYSTEM: MINGW64 RUSTFLAGS: "-C target-feature=+avx" NOVERIFY: "1" - TARGET: x86_64-pc-windows-msvc MSYSTEM: MINGW64 RUSTFLAGS: "-C target-feature=+avx2" NOVERIFY: "1" - TARGET: i686-pc-windows-msvc MSYSTEM: MINGW32 NOVERIFY: "1" - TARGET: i686-pc-windows-msvc MSYSTEM: MINGW32 RUSTFLAGS: "-C target-feature=+sse4.2" NOVERIFY: "1" - TARGET: i686-pc-windows-msvc MSYSTEM: MINGW32 RUSTFLAGS: "-C target-feature=+avx" NOVERIFY: "1" - TARGET: i686-pc-windows-msvc MSYSTEM: MINGW32 RUSTFLAGS: "-C target-feature=+avx2" NOVERIFY: "1" - TARGET: x86_64-pc-windows-gnu MSYSTEM: MINGW64 - TARGET: i686-pc-windows-gnu MSYSTEM: MINGW32 - TARGET: x86_64-pc-windows-gnu MSYSTEM: MINGW64 install: - ps: if (ls -r . -fi "*.rs" | sls "`t") { throw "Found tab character" } - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe" -FileName "rust-install.exe" - ps: .\rust-install.exe /VERYSILENT /NORESTART /DIR="C:\rust" | Out-Null - ps: $env:PATH="$env:PATH;C:\rust\bin" - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH% - rustc -vV - cargo -vV build: false test_script: bash -c "ci/run.sh" packed_simd-0.3.3/.gitignore010064400007650000024000000010511342460246600141730ustar0000000000000000Cargo.lock target/ # llvm-ir and assembly *.ll *.d # png files output by benchmarks *.png # -*- mode: gitignore; -*- *~ \#*\# /.emacs.desktop /.emacs.desktop.lock *.elc auto-save-list tramp .\#* # Org-mode .org-id-locations *_archive # flymake-mode *_flymake.* # eshell files /eshell/history /eshell/lastdir # elpa packages /elpa/ # reftex files *.rel # AUCTeX auto folder /auto/ # cask packages .cask/ dist/ # Flycheck flycheck_*.el # server auth directory /server/ # projectiles files .projectile # directory configuration .dir-locals.elpacked_simd-0.3.3/.travis.yml010064400007650000024000000271301342636765500143340ustar0000000000000000language: rust sudo: false rust: nightly stages: - tools - linux-tier1 - osx-tier1 - osx-tier2 - linux-tier2 - android matrix: fast_finish: true include: # Android: - env: TARGET=x86_64-linux-android NOVERIFY=1 name: "x86_64-unknown-linux-android + SSE2" stage: android - env: TARGET=arm-linux-androideabi name: "arm-linux-androideabi" stage: android - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" name: "arm-linux-androideabi + NEON" stage: android - env: TARGET=aarch64-linux-android name: "aarch64-unknown-linux-android" stage: android - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" name: "aarch64-unknown-linux-android + NEON" stage: android - env: TARGET="thumbv7neon-linux-androideabi" name: "thumbv7neon-linux-androideabi" stage: android # Linux: - env: TARGET=i586-unknown-linux-gnu name: "i586-unknown-linux-gnu" stage: linux-tier2 - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse" name: "i586-unknown-linux-gnu + SSE" stage: linux-tier2 - env: TARGET=i586-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse2" name: "i586-unknown-linux-gnu + SSE2" stage: linux-tier2 - env: TARGET=i686-unknown-linux-gnu name: "i686-unknown-linux-gnu + SSE2" stage: linux-tier1 - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" name: "i686-unknown-linux-gnu + SSE4.2" stage: linux-tier1 - env: TARGET=i686-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" name: "i686-unknown-linux-gnu + AVX2" stage: linux-tier1 - env: TARGET=x86_64-unknown-linux-gnu name: "x86_64-unknown-linux-gnu + SSE2" install: rustup component add rustfmt-preview stage: linux-tier1 - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+sse4.2" name: "x86_64-unknown-linux-gnu + SSE4.2" install: rustup component add rustfmt-preview stage: linux-tier1 - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx" name: "x86_64-unknown-linux-gnu + AVX" install: rustup component add rustfmt-preview stage: linux-tier1 - env: TARGET=x86_64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+avx2" name: "x86_64-unknown-linux-gnu + AVX2" install: rustup component add rustfmt-preview stage: linux-tier1 - env: TARGET=x86_64-unknown-linux-gnu-emulated name: "Intel SDE + SSE2" install: true stage: linux-tier1 - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+sse4.2" name: "Intel SDE + SSE4.2" install: true stage: linux-tier1 - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx" name: "Intel SDE + AVX" install: true stage: linux-tier1 - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx2" name: "Intel SDE + AVX2" install: true stage: linux-tier1 - env: TARGET=x86_64-unknown-linux-gnu-emulated RUSTFLAGS="-C target-feature=+avx-512f" name: "Intel SDE + AVX-512" install: true stage: linux-tier1 - env: TARGET=arm-unknown-linux-gnueabi name: "arm-unknown-linux-gnueabi" stage: linux-tier2 - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" name: "arm-unknown-linux-gnueabi + NEON" stage: linux-tier2 - env: TARGET=arm-unknown-linux-gnueabihf name: "arm-unknown-linux-gnueabihf" stage: linux-tier2 - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" name: "arm-unknown-linux-gnueabihf + NEON" stage: linux-tier2 - env: TARGET=armv7-unknown-linux-gnueabihf name: "armv7-unknown-linux-gnueabihf" stage: linux-tier2 - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" name: "armv7-unknown-linux-gnueabihf + NEON" stage: linux-tier2 - env: TARGET="thumbv7neon-unknown-linux-gnueabihf" name: "thumbv7neon-unknown-linux-gnueabihf" stage: linux-tier2 - env: TARGET=aarch64-unknown-linux-gnu name: "aarch64-unknown-linux-gnu" stage: linux-tier2 - env: TARGET=aarch64-unknown-linux-gnu RUSTFLAGS="-C target-feature=+neon" name: "aarch64-unknown-linux-gnu + NEON" stage: linux-tier2 - env: TARGET=mips-unknown-linux-gnu name: "mips-unknown-linux-gnu" stage: linux-tier2 - env: TARGET=mipsel-unknown-linux-musl name: "mipsel-unknown-linux-musl" stage: linux-tier2 - env: TARGET=mips64-unknown-linux-gnuabi64 name: "mips64-unknown-linux-gnuabi64" stage: linux-tier2 - env: TARGET=mips64el-unknown-linux-gnuabi64 name: "mips64el-unknown-linux-gnuabi64" stage: linux-tier2 # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/18 # env: TARGET=mips64el-unknown-linux-gnuabi64 RUSTFLAGS="-C target-feature=+msa -C target-cpu=mips64r6" - env: TARGET=powerpc-unknown-linux-gnu name: "powerpc-unknown-linux-gnu" stage: linux-tier2 - env: TARGET=powerpc64-unknown-linux-gnu name: "powerpc64-unknown-linux-gnu" stage: linux-tier2 - env: TARGET=powerpc64le-unknown-linux-gnu name: "powerpc64le-unknown-linux-gnu" stage: linux-tier2 - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" name: "powerpc64le-unknown-linux-gnu + ALTIVEC" stage: linux-tier2 - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" name: "powerpc64le-unknown-linux-gnu + VSX" stage: linux-tier2 - env: TARGET=s390x-unknown-linux-gnu name: "s390x-unknown-linux-gnu" stage: linux-tier2 - env: TARGET=sparc64-unknown-linux-gnu name: "sparc64-unknown-linux-gnu" stage: linux-tier2 # WebAssembly: - env: TARGET=wasm32-unknown-unknown name: "wasm32-unknown-unknown" stage: osx-tier1 # For now # MacOSX: - os: osx env: TARGET=i686-apple-darwin name: "i686-apple-darwin + SSE2" script: ci/run.sh osx_image: xcode10 stage: osx-tier1 - os: osx env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" name: "i686-apple-darwin + SSE4.2" script: ci/run.sh osx_image: xcode10 stage: osx-tier1 # Travis-CI OSX build bots do not support AVX2: - os: osx env: TARGET=i686-apple-darwin RUSTFLAGS="-C target-feature=+avx" name: "i686-apple-darwin + AVX" script: ci/run.sh osx_image: xcode10 stage: osx-tier1 - os: osx env: TARGET=x86_64-apple-darwin name: "x86_64-apple-darwin + SSE2" install: true script: ci/run.sh osx_image: xcode10 stage: osx-tier1 - os: osx env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+sse4.2" name: "x86_64-apple-darwin + SSE4.2" install: true script: ci/run.sh osx_image: xcode10 stage: osx-tier1 # Travis-CI OSX build bots do not support AVX2: - os: osx env: TARGET=x86_64-apple-darwin RUSTFLAGS="-C target-feature=+avx" name: "x86_64-apple-darwin + AVX" install: true script: ci/run.sh osx_image: xcode10 stage: osx-tier1 # *BSDs: #- env: TARGET=i686-unknown-freebsd NORUN=1 # script: ci/run.sh #- env: TARGET=x86_64-unknown-freebsd NORUN=1 # script: ci/run.sh #- env: TARGET=x86_64-unknown-netbsd NORUN=1 # script: ci/run.sh # Solaris: #- env: TARGET=x86_64-sun-solaris NORUN=1 # script: ci/run.sh # iOS: - os: osx env: TARGET=i386-apple-ios name: "i386-apple-ios" script: ci/run.sh osx_image: xcode9.4 stage: osx-tier2 - os: osx env: TARGET=x86_64-apple-ios name: "x86_64-apple-ios + SSE2" script: ci/run.sh osx_image: xcode9.4 stage: osx-tier2 - os: osx env: TARGET=armv7-apple-ios NORUN=1 name: "armv7-apple-ios [Build only]" script: ci/run.sh osx_image: xcode9.4 stage: osx-tier2 - os: osx env: TARGET=aarch64-apple-ios NORUN=1 name: "aarch64-apple-ios [Build only]" script: ci/run.sh osx_image: xcode9.4 stage: osx-tier2 # BENCHMARKS: - name: "Benchmarks - x86_64-unknown-linux-gnu" install: TARGET=x86_64-unknown-linux-gnu ./ci/setup_benchmarks.sh script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh stage: tools - name: "Benchmarks - x86_64-apple-darwin" install: TARGET=x86_64-apple-darwin ./ci/setup_benchmarks.sh script: PATH=$(pwd):$PATH NORUN=1 VERIFY=1 FEATURES=core_arch,ispc,sleef-sys ci/benchmark.sh os: osx osx_image: xcode9.4 stage: tools # TOOLS: - name: "Documentation" install: cargo install mdbook script: ci/dox.sh stage: tools - name: "rustfmt" install: true before_script: rustup component add rustfmt-preview script: ci/all.sh check_fmt || true stage: tools - name: "clippy" install: true before_script: rustup component add clippy-preview script: ci/all.sh clippy stage: tools allow_failures: # FIXME: ISPC cannot be found? - name: "Benchmarks - x86_64-apple-darwin" # FIXME: TBD - env: TARGET=powerpc-unknown-linux-gnu - env: TARGET=powerpc64-unknown-linux-gnu - env: TARGET=powerpc64le-unknown-linux-gnu - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+altivec" - env: TARGET=powerpc64le-unknown-linux-gnu RUSTFLAGS="-C target-feature=+vsx" #- env: TARGET=i686-unknown-freebsd NORUN=1 #- env: TARGET=x86_64-unknown-freebsd NORUN=1 #- env: TARGET=x86_64-unknown-netbsd NORUN=1 #- env: TARGET=x86_64-sun-solaris NORUN=1 # FIXME: TBD - env: TARGET=arm-linux-androideabi - env: TARGET=arm-linux-androideabi RUSTFLAGS="-C target-feature=+v7,+neon" - env: TARGET=aarch64-linux-android - env: TARGET=aarch64-linux-android RUSTFLAGS="-C target-feature=+neon" # FIXME: iOS # https://github.com/rust-lang-nursery/packed_simd/issues/26 - env: TARGET=i386-apple-ios - env: TARGET=x86_64-apple-ios # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/182 - env: TARGET=arm-unknown-linux-gnueabi RUSTFLAGS="-C target-feature=+v7,+neon" - env: TARGET=arm-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+v7,+neon" - env: TARGET=armv7-unknown-linux-gnueabihf RUSTFLAGS="-C target-feature=+neon" # FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/183 - env: TARGET=wasm32-unknown-unknown install: travis_retry rustup target add $TARGET before_script: cargo generate-lockfile script: travis_wait 50 ci/run-docker.sh after_script: sleep 5 env: global: secure: "lPHv7s6+AxQYNaFncycVFQt++Y1asQmMhOikQU1ztlP8CK7+hn2m98cg/euOJyzIOb2iJ3ZX4cGZkzw4lc59MQBByb1GtDbazQoUOzVDbVfe9BDD2f8JVoIFh1CMfjPKQ7Gg/rJqWlwrUlSd5GNxPCutKjY7qZhJuR6SQbJjlWaGN2Vd4fVCzKXz8fHRXgMEZS+d+CR4Nsrkb83J3Z4s5kSdJmhYxJ61AWjuzJVwUh4l3/HEYlSL5XXpuh5R2i7W16h1PlNdaTUgkZli1lHzO8+6Q8LzX9+XiLIEVX9lw3A2NdIKGz8E/+7Qs5oYOkwYhjROsDQxIK7xkSM30bQuN7cwMBybAVIyOPJkqXQ1dQyp83KSdsOj7JMyDDRvcEDLI6ehRlm5EcdH7YrReuboN81iUo0Sa7VsuUmgj5hjERCt9r30f9aWuitABai7vKRtjglg7Sp5CrEVPA4PQs6PqKCCRogoggbXJ/Z5Dyw/RZaXPeNR9+qIKN1Vjm9Gew1sRN2JK/3+vXTKtyJXH/uBxgJt4jQlbuShOJuF+BSfTF88sMe67a/357SSOIb4JkaCyd0flDCWYE8576kaHPlVVMT2peXee0LeRXm1e13nG3Na0t3LS/orJLPHOShNQGoDj7qAP5aEKggRya896JGwtvlaBHHTmSQh65G7cyNErZo=" branches: only: - staging # bors r+ - trying # bors try - master notifications: email: on_success: never packed_simd-0.3.3/bors.toml010064400007650000024000000000701332536563700140530ustar0000000000000000status = [ "continuous-integration/travis-ci/push" ]packed_simd-0.3.3/build.rs010064400007650000024000000003141342636765500136630ustar0000000000000000fn main() { let target = std::env::var("TARGET") .expect("TARGET environment variable not defined"); if target.contains("neon") { println!("cargo:rustc-cfg=libcore_neon"); } } packed_simd-0.3.3/Cargo.toml.orig010064400007650000024000000025141342637036700151030ustar0000000000000000[package] name = "packed_simd" version = "0.3.3" authors = ["Gonzalo Brito Gadeschi "] description = "Portable Packed SIMD vectors" documentation = "https://docs.rs/crate/packed_simd/" homepage = "https://github.com/rust-lang-nursery/packed_simd" repository = "https://github.com/rust-lang-nursery/packed_simd" keywords = ["simd", "vector", "portability"] categories = ["hardware-support", "concurrency", "no-std", "data-structures"] license = "MIT/Apache-2.0" build = "build.rs" edition = "2018" [badges] appveyor = { repository = "rust-lang-nursery/packed_simd" } travis-ci = { repository = "rust-lang-nursery/packed_simd" } codecov = { repository = "rust-lang-nursery/packed_simd" } is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/packed_simd" } is-it-maintained-open-issues = { repository = "rust-lang-nursery/packed_simd" } maintenance = { status = "experimental" } [dependencies] cfg-if = "^0.1" core_arch = { version = "^0.1.3", optional = true } [features] default = [] into_bits = [] libcore_neon = [] [dev-dependencies] paste = "^0.1.3" arrayvec = { version = "^0.4", default-features = false } [target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys] version = "^0.1.2" optional = true [target.wasm32-unknown-unknown.dev-dependencies] wasm-bindgen = "=0.2.19" wasm-bindgen-test = "=0.2.19"packed_simd-0.3.3/Cargo.toml0000644000000037140000000000000113440ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g. crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "packed_simd" version = "0.3.3" authors = ["Gonzalo Brito Gadeschi "] build = "build.rs" description = "Portable Packed SIMD vectors" homepage = "https://github.com/rust-lang-nursery/packed_simd" documentation = "https://docs.rs/crate/packed_simd/" keywords = ["simd", "vector", "portability"] categories = ["hardware-support", "concurrency", "no-std", "data-structures"] license = "MIT/Apache-2.0" repository = "https://github.com/rust-lang-nursery/packed_simd" [dependencies.cfg-if] version = "^0.1" [dependencies.core_arch] version = "^0.1.3" optional = true [dev-dependencies.arrayvec] version = "^0.4" default-features = false [dev-dependencies.paste] version = "^0.1.3" [features] default = [] into_bits = [] libcore_neon = [] [target."cfg(target_arch = \"x86_64\")".dependencies.sleef-sys] version = "^0.1.2" optional = true [target.wasm32-unknown-unknown.dev-dependencies.wasm-bindgen] version = "=0.2.19" [target.wasm32-unknown-unknown.dev-dependencies.wasm-bindgen-test] version = "=0.2.19" [badges.appveyor] repository = "rust-lang-nursery/packed_simd" [badges.codecov] repository = "rust-lang-nursery/packed_simd" [badges.is-it-maintained-issue-resolution] repository = "rust-lang-nursery/packed_simd" [badges.is-it-maintained-open-issues] repository = "rust-lang-nursery/packed_simd" [badges.maintenance] status = "experimental" [badges.travis-ci] repository = "rust-lang-nursery/packed_simd" packed_simd-0.3.3/Cargo.toml.orig0000644000000025140000000000000123000ustar00[package] name = "packed_simd" version = "0.3.3" authors = ["Gonzalo Brito Gadeschi "] description = "Portable Packed SIMD vectors" documentation = "https://docs.rs/crate/packed_simd/" homepage = "https://github.com/rust-lang-nursery/packed_simd" repository = "https://github.com/rust-lang-nursery/packed_simd" keywords = ["simd", "vector", "portability"] categories = ["hardware-support", "concurrency", "no-std", "data-structures"] license = "MIT/Apache-2.0" build = "build.rs" edition = "2018" [badges] appveyor = { repository = "rust-lang-nursery/packed_simd" } travis-ci = { repository = "rust-lang-nursery/packed_simd" } codecov = { repository = "rust-lang-nursery/packed_simd" } is-it-maintained-issue-resolution = { repository = "rust-lang-nursery/packed_simd" } is-it-maintained-open-issues = { repository = "rust-lang-nursery/packed_simd" } maintenance = { status = "experimental" } [dependencies] cfg-if = "^0.1" core_arch = { version = "^0.1.3", optional = true } [features] default = [] into_bits = [] libcore_neon = [] [dev-dependencies] paste = "^0.1.3" arrayvec = { version = "^0.4", default-features = false } [target.'cfg(target_arch = "x86_64")'.dependencies.sleef-sys] version = "^0.1.2" optional = true [target.wasm32-unknown-unknown.dev-dependencies] wasm-bindgen = "=0.2.19" wasm-bindgen-test = "=0.2.19"packed_simd-0.3.3/contributing.md010064400007650000024000000056151342460246600152460ustar0000000000000000# Contributing to `packed_simd` Welcome! If you are reading this document, it means you are interested in contributing to the `packed_simd` crate. ## Reporting issues All issues with this crate are tracked using GitHub's [Issue Tracker]. You can use issues to bring bugs to the attention of the maintainers, to discuss certain problems encountered with the crate, or to request new features (although feature requests should be limited to things mentioned in the [RFC]). One thing to keep in mind is to always use the **latest** nightly toolchain when working on this crate. Due to the nature of this project, we use a lot of unstable features, meaning breakage happens often. [Issue Tracker]: https://github.com/rust-lang-nursery/packed_simd/issues [RFC]: https://github.com/rust-lang/rfcs/pull/2366 ### LLVM issues The Rust compiler relies on [LLVM](https://llvm.org/) for machine code generation, and quite a few LLVM bugs have been discovered during the development of this project. If you encounter issues with incorrect/suboptimal codegen, which you do not encounter when using the [SIMD vendor intrinsics](https://doc.rust-lang.org/nightly/std/arch/), it is likely the issue is with LLVM, or this crate's interaction with it. You should first open an issue **in this repo** to help us track the problem, and we will help determine what is the exact cause of the problem. If LLVM is indeed the cause, the issue will be reported upstream to the [LLVM bugtracker](https://bugs.llvm.org/). ## Submitting Pull Requests New code is submitted to the crate using GitHub's [pull request] mechanism. You should first fork this repository, make your changes (preferrably in a new branch), then use GitHub's web UI to create a new PR. [pull request]: https://help.github.com/articles/about-pull-requests/ ### Examples The `examples` directory contains code showcasing SIMD code written with this crate, usually in comparison to scalar or ISPC code. If you have a project / idea which uses SIMD, we'd love to add it to the examples list. Every example should include a small `README`, describing the example code's purpose. If your example could potentially work as a benchmark, then add a `benchmark.sh` script to allow running the example benchmark code in CI. See an existing example's [`benchmark.sh`](examples/aobench/benchmark.sh) for a sample. Don't forget to update the crate's top-level `README` with a link to your example. ### Perf guide The objective of the [performance guide][perf-guide] is to be a comprehensive resource detailing the process of optimizing Rust code with SIMD support. If you believe a certain section could be reworded, or if you have any tips & tricks related to SIMD which you'd like to share, please open a PR. [mdBook] is used to manage the formatting of the guide as a book. [perf-guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ [mdBook]: https://github.com/rust-lang-nursery/mdBook packed_simd-0.3.3/LICENSE-APACHE010064400007650000024000000251371332536563700141500ustar0000000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. packed_simd-0.3.3/LICENSE-MIT010064400007650000024000000020571332536563700136540ustar0000000000000000Copyright (c) 2014 The Rust Project Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. packed_simd-0.3.3/perf-guide/.gitignore010064400007650000024000000000061342460246600162210ustar0000000000000000/book packed_simd-0.3.3/perf-guide/book.toml010064400007650000024000000004501342460246600160630ustar0000000000000000[book] authors = ["Gonzalo Brito Gadeschi", "Gabriel Majeri"] multilingual = false src = "src" title = "Rust SIMD Performance Guide" description = "This book describes how to write performant SIMD code in Rust." [build] create-missing = false [output.html] additional-css = ["./src/ascii.css"] packed_simd-0.3.3/perf-guide/src/ascii.css010064400007650000024000000002061342460246600166240ustar0000000000000000code { /* "Source Code Pro" breaks ASCII art */ font-family: Consolas, "Ubuntu Mono", Menlo, "DejaVu Sans Mono", monospace; } packed_simd-0.3.3/perf-guide/src/bound_checks.md010064400007650000024000000016171342460246600200020ustar0000000000000000# Bounds checking Reading and writing packed vectors to/from slices is checked by default. Independently of the configuration options used, the safe functions: * `Simd<[T; N]>::from_slice_aligned(& s[..])` * `Simd<[T; N]>::write_to_slice_aligned(&mut s[..])` always check that: * the slice is big enough to hold the vector * the slice is suitably aligned to perform an aligned load/store for a `Simd<[T; N]>` (this alignment is often much larger than that of `T`). There are `_unaligned` versions that use unaligned load and stores, as well as `unsafe` `_unchecked` that do not perform any checks iff `debug-assertions = false` / `debug = false`. That is, the `_unchecked` methods do still assert size and alignment in debug builds and could also do so in release builds depending on the configuration options. These assertions do often significantly impact performance and you should be aware of them. packed_simd-0.3.3/perf-guide/src/float-math/approx.md010064400007650000024000000004111342460246600207070ustar0000000000000000# Approximate functions packed_simd-0.3.3/perf-guide/src/float-math/fma.md010064400007650000024000000002721342460246600201460ustar0000000000000000# Fused Multiply Add packed_simd-0.3.3/perf-guide/src/float-math/fp.md010064400007650000024000000001541342460246600200070ustar0000000000000000# Floating-point math This chapter contains information pertaining to working with floating-point numbers. packed_simd-0.3.3/perf-guide/src/float-math/svml.md010064400007650000024000000002451342460246600203640ustar0000000000000000# Short Vector Math Library packed_simd-0.3.3/perf-guide/src/introduction.md010064400007650000024000000011761342460246600200740ustar0000000000000000# Introduction ## What is SIMD ## History of SIMD in Rust ## Discover packed_simd Writing fast and portable SIMD algorithms using `packed_simd` is, unfortunately, not trivial. There are many pitfals that one should be aware of, and some idioms that help avoid those pitfalls. This book attempts to document these best practices and provides practical examples on how to apply the tips to _your_ code. packed_simd-0.3.3/perf-guide/src/prof/linux.md010064400007650000024000000066241342460246600174630ustar0000000000000000# Performance profiling on Linux ## Using `perf` [perf](https://perf.wiki.kernel.org/) is the most powerful performance profiler for Linux, featuring support for various hardware Performance Monitoring Units, as well as integration with the kernel's performance events framework. We will only look at how can the `perf` command can be used to profile SIMD code. Full system profiling is outside of the scope of this book. ### Recording The first step is to record a program's execution during an average workload. It helps if you can isolate the parts of your program which have performance issues, and set up a benchmark which can be easily (re)run. Build the benchmark binary in release mode, after having enabled debug info: ```sh $ cargo build --release Finished release [optimized + debuginfo] target(s) in 0.02s ``` Then use the `perf record` subcommand: ```sh $ perf record --call-graph=dwarf ./target/release/my-program [ perf record: Woken up 10 times to write data ] [ perf record: Captured and wrote 2,356 MB perf.data (292 samples) ] ``` Instead of using `--call-graph=dwarf`, which can become pretty slow, you can use `--call-graph=lbr` if you have a processor with support for Last Branch Record (i.e. Intel Haswell and newer). `perf` will, by default, record the count of CPU cycles it takes to execute various parts of your program. You can use the `-e` command line option to enable other performance events, such as `cache-misses`. Use `perf list` to get a list of all hardware counters supported by your CPU. ### Viewing the report The next step is getting a bird's eye view of the program's execution. `perf` provides a `ncurses`-based interface which will get you started. Use `perf report` to open a visualization of your program's performance: ```sh perf report --hierarchy -M intel ``` `--hierarchy` will display a tree-like structure of where your program spent most of its time. `-M intel` enables disassembly output with Intel syntax, which is subjectively more readable than the default AT&T syntax. Here is the output from profiling the `nbody` benchmark: ``` - 100,00% nbody - 94,18% nbody + 93,48% [.] nbody_lib::simd::advance + 0,70% [.] nbody_lib::run + 5,06% libc-2.28.so ``` If you move with the arrow keys to any node in the tree, you can the press `a` to have `perf` _annotate_ that node. This means it will: - disassemble the function - associate every instruction with the percentage of time which was spent executing it - interleaves the disassembly with the source code, assuming it found the debug symbols (you can use `s` to toggle this behaviour) `perf` will, by default, open the instruction which it identified as being the hottest spot in the function: ``` 0,76 │ movapd xmm2,xmm0 0,38 │ movhlps xmm2,xmm0 │ addpd xmm2,xmm0 │ unpcklpd xmm1,xmm2 12,50 │ sqrtpd xmm0,xmm1 1,52 │ mulpd xmm0,xmm1 ``` In this case, `sqrtpd` will be highlighted in red, since that's the instruction which the CPU spends most of its time executing. ## Using Valgrind Valgrind is a set of tools which initially helped C/C++ programmers find unsafe memory accesses in their code. Nowadays the project also has - a heap profiler called `massif` - a cache utilization profiler called `cachegrind` - a call-graph performance profiler called `callgrind` packed_simd-0.3.3/perf-guide/src/prof/mca.md010064400007650000024000000070551342460246600170630ustar0000000000000000# Machine code analysis tools ## The microarchitecture of modern CPUs While you might have heard of Instruction Set Architectures, such as `x86` or `arm` or `mips`, the term _microarchitecture_ (also written here as _µ-arch_), refers to the internal details of an actual family of CPUs, such as Intel's _Haswell_ or AMD's _Jaguar_. Replacing scalar code with SIMD code will improve performance on all CPUs supporting the required vector extensions. However, due to microarchitectural differences, the actual speed-up at runtime might vary. **Example**: a simple example arises when optimizing for AMD K8 CPUs. The assembly generated for an empty function should look like this: ```asm nop ret ``` The `nop` is used to align the `ret` instruction for better performance. However, the compiler will actually generated the following code: ```asm repz ret ``` The `repz` instruction will repeat the following instruction until a certain condition. Of course, in this situation, the function will simply immediately return, and the `ret` instruction is still aligned. However, AMD K8's branch predictor performs better with the latter code. For those looking to absolutely maximize performance for a certain target µ-arch, you will have to read some CPU manuals, or ask the compiler to do it for you with `-C target-cpu`. ### Summary of CPU internals Modern processors are able to execute instructions out-of-order for better performance, by utilizing tricks such as [branch prediction], [instruction pipelining], or [superscalar execution]. [branch prediction]: https://en.wikipedia.org/wiki/Branch_predictor [instruction pipelining]: https://en.wikipedia.org/wiki/Instruction_pipelining [superscalar execution]: https://en.wikipedia.org/wiki/Superscalar_processor SIMD instructions are also subject to these optimizations, meaning it can get pretty difficult to determine where the slowdown happens. For example, if the profiler reports a store operation is slow, one of two things could be happening: - the store is limited by the CPU's memory bandwidth, which is actually an ideal scenario, all things considered; - memory bandwidth is nowhere near its peak, but the value to be stored is at the end of a long chain of operations, and this store is where the profiler encountered the pipeline stall; Since most profilers are simple tools which don't understand the subtleties of instruction scheduling, you ## Analyzing the machine code Certain tools have knowledge of internal CPU microarchitecture, i.e. they know - how many physical [register files] a CPU actually has - what is the latency / throughtput of an instruction - what [µ-ops] are generated for a set of instructions and many other architectural details. [register files]: https://en.wikipedia.org/wiki/Register_file [µ-ops]: https://en.wikipedia.org/wiki/Micro-operation These tools are therefore able to provide accurate information as to why some instructions are inefficient, and where the bottleneck is. The disadvantage is that the output of these tools requires advanced knowledge of the target architecture to understand, i.e. they **cannot** point out what the cause of the issue is explicitly. ## Intel's Architecture Code Analyzer (IACA) [IACA] is a free tool offered by Intel for analyzing the performance of various computational kernels. Being a proprietary, closed source tool, it _only_ supports Intel's µ-arches. [IACA]: https://software.intel.com/en-us/articles/intel-architecture-code-analyzer ## llvm-mca packed_simd-0.3.3/perf-guide/src/prof/profiling.md010064400007650000024000000013421342460246600203050ustar0000000000000000# Performance profiling While the rest of the book provides practical advice on how to improve the performance of SIMD code, this chapter is dedicated to [**performance profiling**][profiling]. Profiling consists of recording a program's execution in order to identify program hotspots. **Important**: most profilers require debug information in order to accurately link the program hotspots back to the corresponding source code lines. Rust will disable debug info generation by default for optimized builds, but you can change that [in your `Cargo.toml`][cargo-ref]. [profiling]: https://en.wikipedia.org/wiki/Profiling_(computer_programming) [cargo-ref]: https://doc.rust-lang.org/cargo/reference/manifest.html#the-profile-sections packed_simd-0.3.3/perf-guide/src/SUMMARY.md010064400007650000024000000014321342460246600165030ustar0000000000000000# Summary [Introduction](./introduction.md) - [Floating-point Math](./float-math/fp.md) - [Short-vector Math Library](./float-math/svml.md) - [Approximate functions](./float-math/approx.md) - [Fused multiply-accumulate](./float-math/fma.md) - [Target features](./target-feature/features.md) - [Using `RUSTFLAGS`](./target-feature/rustflags.md) - [Using the `target_feature` attribute](./target-feature/attribute.md) - [Interaction with inlining](./target-feature/inlining.md) - [Detecting features at runtime](./target-feature/runtime.md) - [Bounds checking](./bound_checks.md) - [Vertical and horizontal operations](./vert-hor-ops.md) - [Performance profiling](./prof/profiling.md) - [Profiling on Linux](./prof/linux.md) - [Using machine code analyzers](./prof/mca.md) packed_simd-0.3.3/perf-guide/src/target-feature/attribute.md010064400007650000024000000001331342460246600222650ustar0000000000000000# The `target_feature` attribute packed_simd-0.3.3/perf-guide/src/target-feature/features.md010064400007650000024000000013511342460246600221030ustar0000000000000000# Enabling target features Not all processors of a certain architecture will have SIMD processing units, and using a SIMD instruction which is not supported will trigger undefined behavior. To allow building safe, portable programs, the Rust compiler will **not**, by default, generate any sort of vector instructions, unless it can statically determine they are supported. For example, on AMD64, SSE2 support is architecturally guaranteed. The `x86_64-apple-darwin` target enables up to SSSE3. The get a defintive list of which features are enabled by default on various platforms, refer to the target specifications [in the compiler's source code][targets]. [targets]: https://github.com/rust-lang/rust/tree/master/src/librustc_target/spec packed_simd-0.3.3/perf-guide/src/target-feature/inlining.md010064400007650000024000000001411342460246600220700ustar0000000000000000# Inlining packed_simd-0.3.3/perf-guide/src/target-feature/practice.md010064400007650000024000000027161342460246600220650ustar0000000000000000# Target features in practice Using `RUSTFLAGS` will allow the crate being compiled, as well as all its transitive dependencies to use certain target features. A tehnique used to avoid undefined behavior at runtime is to compile and ship multiple binaries, each compiled with a certain set of features. This might not be feasible in some cases, and can quickly get out of hand as more and more vector extensions are added to an architecture. Rust can be more flexible: you can build a single binary/library which automatically picks the best supported vector instructions depending on the host machine. The trick consists of monomorphizing parts of the code during building, and then using run-time feature detection to select the right code path when running. **NOTE** (x86 specific): because the AVX (256-bit) registers extend the existing SSE (128-bit) registers, mixing SSE and AVX instructions in a program can cause performance issues. The solution is to compile all code, even the code written with 128-bit vectors, with the AVX target feature enabled. This will cause the compiler to prefix the generated instructions with the [VEX] prefix. [VEX]: https://en.wikipedia.org/wiki/VEX_prefix packed_simd-0.3.3/perf-guide/src/target-feature/runtime.md010064400007650000024000000001221342460246600217430ustar0000000000000000# Detecting host features at runtime packed_simd-0.3.3/perf-guide/src/target-feature/rustflags.md010064400007650000024000000051041342460246600222770ustar0000000000000000# Using RUSTFLAGS One of the easiest ways to benefit from SIMD is to allow the compiler to generate code using certain vector instruction extensions. The environment variable `RUSTFLAGS` can be used to pass options for code generation to the Rust compiler. These flags will affect **all** compiled crates. There are two flags which can be used to enable specific vector extensions: ## target-feature - Syntax: `-C target-feature=` - Provides the compiler with a comma-separated set of instruction extensions to enable. **Example**: Use `-C target-features=+sse3,+avx` to enable generating instructions for [Streaming SIMD Extensions 3](https://en.wikipedia.org/wiki/SSE3) and [Advanced Vector Extensions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions). - To list target triples for all targets supported by Rust, use: ```sh rustc --print target-list ``` - To list all support target features for a certain target triple, use: ```sh rustc --target=${TRIPLE} --print target-features ``` - Note that all CPU features are independent, and will have to be enabled individually. **Example**: Setting `-C target-features=+avx2` will _not_ enable `fma`, even though all CPUs which support AVX2 also support FMA. To enable both, one has to use `-C target-features=+avx2,+fma` - Some features also depend on other features, which need to be enabled for the target instructions to be generated. **Example**: Unless `v7` is specified as the target CPU (see below), to enable NEON on ARM it is necessary to use `-C target-feature=+v7,+neon`. ## target-cpu - Syntax: `-C target-cpu=` - Sets the identifier of a CPU family / model for which to build and optimize the code. **Example**: `RUSTFLAGS='-C target-cpu=cortex-a75'` - To list all supported target CPUs for a certain target triple, use: ```sh rustc --target=${TRIPLE} --print target-cpus ``` **Example**: ```sh rustc --target=i686-pc-windows-msvc --print target-cpus ``` - The compiler will translate this into a list of target features. Therefore, individual feature checks (`#[cfg(target_feature = "...")]`) will still work properly. - It will cause the code generator to optimize the generated code for that specific CPU model. - Using `native` as the CPU model will cause Rust to generate and optimize code for the CPU running the compiler. It is useful when building programs which you plan to only use locally. This should never be used when the generated programs are meant to be run on other computers, such as when packaging for distribution or cross-compiling. packed_simd-0.3.3/perf-guide/src/vert-hor-ops.md010064400007650000024000000052371342460246600177220ustar0000000000000000# Vertical and horizontal operations In SIMD terminology, each vector has a certain "width" (number of lanes). A vector processor is able to perform two kinds of operations on a vector: - Vertical operations: operate on two vectors of the same width, result has same width **Example**: vertical addition of two `f32x4` vectors %0 == | 2 | -3.5 | 0 | 7 | + + + + %1 == | 4 | 1.5 | -1 | 0 | = = = = %0 + %1 == | 6 | -2 | -1 | 7 | - Horizontal operations: reduce the elements of two vectors in some way, the result's elements combine information from the two original ones **Example**: horizontal addition of two `u64x2` vectors %0 == | 1 | 3 | └─+───┘ └───────┐ │ %1 == | 4 | -1 | │ └─+──┘ │ └───┐ │ │ │ ┌─────│───┘ ▼ ▼ %0 + %1 == | 4 | 3 | ## Performance consideration of horizontal operations The result of vertical operations, like vector negation: `-a`, for a given lane, does not depend on the result of the operation for the other lanes. The result of horizontal operations, like the vector `sum` reduction: `a.sum()`, depends on the value of all vector lanes. In virtually all architectures vertical operations are fast, while horizontal operations are, by comparison, very slow. Consider the following two functions for computing the sum of all `f32` values in a slice: ```rust fn fast_sum(x: &[f32]) -> f32 { assert!(x.len() % 4 == 0); let mut sum = f32x4::splat(0.); // [0., 0., 0., 0.] for i in (0..x.len()).step_by(4) { sum += f32x4::from_slice_unaligned(&x[i..]); } sum.sum() } fn slow_sum(x: &[f32]) -> f32 { assert!(x.len() % 4 == 0); let mut sum: f32 = 0.; for i in (0..x.len()).step_by(4) { sum += f32x4::from_slice_unaligned(&x[i..]).sum(); } sum } ``` The inner loop over the slice is where the bulk of the work actually happens. There, the `fast_sum` function perform vertical operations into a vector, doing a single horizontal reduction at the end, while the `slow_sum` function performs horizontal vector operations inside of the loop. On all widely-used architectures, `fast_sum` is a large constant factor faster than `slow_sum`. You can run the [slice_sum]() example and see for yourself. On the particular machine tested there the algorithm using the horizontal vector addition is 2.7x slower than the one using vertical vector operations! packed_simd-0.3.3/readme.md010064400007650000024000000214021342637030000137530ustar0000000000000000# `Simd<[T; N]>` ## Implementation of [Rust RFC #2366: `std::simd`][rfc2366] [![Travis-CI Status]][travis] [![Appveyor Status]][appveyor] [![Latest Version]][crates.io] [![docs]][master_docs] > This aims to be a 100% conforming implementation of Rust RFC 2366 for stabilization. **WARNING**: this crate only supports the most recent nightly Rust toolchain. ## Documentation * [API docs (`master` branch)][master_docs] * [Performance guide][perf_guide] * [API docs (`docs.rs`)][docs.rs]: **CURRENTLY DOWN** due to https://github.com/rust-lang-nursery/packed_simd/issues/110 * [RFC2366 `std::simd`][rfc2366]: - contains motivation, design rationale, discussion, etc. ## Examples Most of the examples come with both a scalar and a vectorized implementation. * [`aobench`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/aobench) * [`fannkuch_redux`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/fannkuch_redux) * [`matrix inverse`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/matrix_inverse) * [`mandelbrot`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/mandelbrot) * [`n-body`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/nbody) * [`options_pricing`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/options_pricing) * [`spectral_norm`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/spectral_norm) * [`triangle transform`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/triangle_xform) * [`stencil`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/stencil) * [`vector dot product`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/dot_product) ## Cargo features * `into_bits` (default: disabled): enables `FromBits`/`IntoBits` trait implementations for the vector types. These allow reinterpreting the bits of a vector type as those of another vector type safely by just using the `.into_bits()` method. * `core_arch` (default: disabled): enable this feature to recompile `core::arch` for the target-features enabled. `packed_simd` includes optimizations for some target feature combinations that are enabled by this feature. Note, however, that this is an unstable dependency, that rustc might break at any time. * `sleef-sys` (default: disabled - `x86_64` only): internally uses the [SLEEF] short-vector math library when profitable via the [`sleef-sys`][sleef_sys] crate. [SLEEF] is licensed under the [Boost Software License v1.0][boost_license], an extremely permissive license, and can be statically linked without issues. ## Performance The following [ISPC] examples are also part of `packed_simd`'s [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples/) directory, where `packed_simd`+[`rayon`][rayon] are used to emulate [ISPC]'s Single-Program-Multiple-Data (SPMD) programming model. The performance results on different hardware is shown in the `readme.md` of each example. The following table summarizes the performance ranges, where `+` means speed-up and `-` slowdown: * `aobench`: `[-1.02x, +1.53x]`, * `stencil`: `[+1.06x, +1.72x]`, * `mandelbrot`: `[-1.74x, +1.2x]`, * `options_pricing`: * `black_scholes`: `+1.0x` * `binomial_put`: `+1.4x` While SPMD is not the intended use case for `packed_simd`, it is possible to combine the library with [`rayon`][rayon] to poorly emulate [ISPC]'s SPMD programming model in Rust. Writing performant code is not as straightforward as with [ISPC], but with some care (e.g. see the [Performance Guide][perf_guide]) one can easily match and often out-perform [ISPC]'s "default performance". ## Platform support The following table describes the supported platforms: `build` shows whether the library compiles without issues for a given target, while `run` shows whether the full testsuite passes on the target. | Linux targets: | build | run | |-----------------------------------|-----------|---------| | `i586-unknown-linux-gnu` | ✓ | ✓ | | `i686-unknown-linux-gnu` | ✓ | ✓ | | `x86_64-unknown-linux-gnu` | ✓ | ✓ | | `arm-unknown-linux-gnueabi` | ✗ | ✗ | | `arm-unknown-linux-gnueabihf` | ✓ | ✓ | | `armv7-unknown-linux-gnueabi` | ✓ | ✓ | | `aarch64-unknown-linux-gnu` | ✓ | ✓ | | `mips-unknown-linux-gnu` | ✓ | ✓ | | `mipsel-unknown-linux-musl` | ✓ | ✓ | | `mips64-unknown-linux-gnuabi64` | ✓ | ✓ | | `mips64el-unknown-linux-gnuabi64` | ✓ | ✓ | | `powerpc-unknown-linux-gnu` | ✗ | ✗ | | `powerpc64-unknown-linux-gnu` | ✗ | ✗ | | `powerpc64le-unknown-linux-gnu` | ✗ | ✗ | | `s390x-unknown-linux-gnu` | ✓ | ✓* | | `sparc64-unknown-linux-gnu` | ✓ | ✓* | | **MacOSX targets:** | **build** | **run** | | `x86_64-apple-darwin` | ✓ | ✓ | | `i686-apple-darwin` | ✓ | ✓ | | **Windows targets:** | **build** | **run** | | `x86_64-pc-windows-msvc` | ✓ | ✓ | | `i686-pc-windows-msvc` | ✓ | ✓ | | `x86_64-pc-windows-gnu` | ✗ | ✗ | | `i686-pc-windows-gnu` | ✗ | ✗ | | **WebAssembly targets:** | **build** | **run** | | `wasm32-unknown-unknown` | ✓ | ✓ | | **Android targets:** | **build** | **run** | | `x86_64-linux-android` | ✓ | ✓ | | `arm-linux-androideabi` | ✓ | ✓ | | `aarch64-linux-android` | ✓ | ✗ | | **iOS targets:** | **build** | **run** | | `i386-apple-ios` | ✓ | ✗ | | `x86_64-apple-ios` | ✓ | ✗ | | `armv7-apple-ios` | ✓ | ✗** | | `aarch64-apple-ios` | ✓ | ✗** | | **xBSD targets:** | **build** | **run** | | `i686-unknown-freebsd` | ✗ | ✗** | | `x86_64-unknown-freebsd` | ✗ | ✗** | | `x86_64-unknown-netbsd` | ✗ | ✗** | | **Solaris targets:** | **build** | **run** | | `x86_64-sun-solaris` | ✗ | ✗** | [*] most of the test suite passes correctly on these platform but there are correctness bugs open in the issue tracker. [**] it is currently not easily possible to run these platforms on CI. ## Machine code verification The [`verify/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/verify) crate tests disassembles the portable packed vector APIs at run-time and compares the generated machine code against the desired one to make sure that this crate remains efficient. ## License This project is licensed under either of * [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) ([LICENSE-APACHE](LICENSE-APACHE)) * [MIT License](http://opensource.org/licenses/MIT) ([LICENSE-MIT](LICENSE-MIT)) at your option. ## Contributing We welcome all people who want to contribute. Please see the [contributing instructions] for more information. Contributions in any form (issues, pull requests, etc.) to this project must adhere to Rust's [Code of Conduct]. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in `packed_simd` by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. [travis]: https://travis-ci.org/rust-lang-nursery/packed_simd [Travis-CI Status]: https://travis-ci.org/rust-lang-nursery/packed_simd.svg?branch=master [appveyor]: https://ci.appveyor.com/project/gnzlbg/packed-simd [Appveyor Status]: https://ci.appveyor.com/api/projects/status/hd7v9dvr442hgdix?svg=true [Latest Version]: https://img.shields.io/crates/v/packed_simd.svg [crates.io]: https://crates.io/crates/packed_simd [docs]: https://docs.rs/packed_simd/badge.svg [docs.rs]: https://docs.rs/packed_simd/ [master_docs]: https://rust-lang-nursery.github.io/packed_simd/packed_simd/ [perf_guide]: https://rust-lang-nursery.github.io/packed_simd/perf-guide/ [rfc2366]: https://github.com/rust-lang/rfcs/pull/2366 [ISPC]: https://ispc.github.io/ [rayon]: https://crates.io/crates/rayon [boost_license]: https://www.boost.org/LICENSE_1_0.txt [SLEEF]: https://sleef.org/ [sleef_sys]: https://crates.io/crates/sleef-sys [contributing instructions]: contributing.md [Code of Conduct]: https://www.rust-lang.org/en-US/conduct.html packed_simd-0.3.3/rust-toolchain010064400007650000024000000000071342460246600151010ustar0000000000000000nightlypacked_simd-0.3.3/rustfmt.toml010064400007650000024000000002411342460246600146040ustar0000000000000000max_width = 79 use_small_heuristics = "Max" wrap_comments = true comment_width = 79 fn_args_density = "Compressed" edition = "2018" error_on_line_overflow = truepacked_simd-0.3.3/src/api.rs010064400007650000024000000323401342636765500141300ustar0000000000000000//! Implements the Simd<[T; N]> APIs #[macro_use] mod bitmask; crate mod cast; #[macro_use] mod cmp; #[macro_use] mod default; #[macro_use] mod fmt; #[macro_use] mod from; #[macro_use] mod hash; #[macro_use] mod math; #[macro_use] mod minimal; #[macro_use] mod ops; #[macro_use] mod ptr; #[macro_use] mod reductions; #[macro_use] mod select; #[macro_use] mod shuffle; #[macro_use] mod shuffle1_dyn; #[macro_use] mod slice; #[macro_use] mod swap_bytes; #[macro_use] mod bit_manip; #[cfg(feature = "into_bits")] crate mod into_bits; macro_rules! impl_i { ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident | $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt | $($elem_ids),* | $(#[$doc])*); impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_vector_bitwise!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ); impl_ops_scalar_bitwise!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ); impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_vector_int_min_max!( [$elem_ty; $elem_n]: $tuple_id | $test_tt ); impl_reduction_integer_arithmetic!( [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ); impl_reduction_min_max!( [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ); impl_reduction_bitwise!( [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) ); impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); impl_from_vectors!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ); impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_cmp_partial_eq!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1) ); impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); impl_cmp_vertical!( [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt ); impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); impl_bitmask!($tuple_id | $ibitmask_ty | (-1, 0) | $test_tt); test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); } } macro_rules! impl_u { ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident | $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt | $($elem_ids),* | $(#[$doc])*); impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_vector_bitwise!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ); impl_ops_scalar_bitwise!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (!(0 as $elem_ty), 0) ); impl_ops_vector_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_scalar_shifts!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_vector_rotates!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_vector_int_min_max!( [$elem_ty; $elem_n]: $tuple_id | $test_tt ); impl_reduction_integer_arithmetic!( [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ); impl_reduction_min_max!( [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ); impl_reduction_bitwise!( [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt | (|x|{ x as $elem_ty }) | (!(0 as $elem_ty), 0) ); impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_fmt_lower_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_fmt_upper_hex!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_fmt_octal!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_fmt_binary!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 1)); impl_from_vectors!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ); impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_hash!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_swap_bytes!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_bit_manip!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_cmp_partial_eq!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1, 0) ); impl_cmp_eq!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); impl_cmp_vertical!( [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1, 0) | $test_tt ); impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_cmp_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (0, 1)); impl_bitmask!($tuple_id | $ibitmask_ty | ($ielem_ty::max_value(), 0) | $test_tt); test_select!($elem_ty, $mask_ty, $tuple_id, (1, 2) | $test_tt); test_cmp_partial_ord_int!([$elem_ty; $elem_n]: $tuple_id | $test_tt); test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); } } macro_rules! impl_f { ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident | $ielem_ty:ident | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { impl_minimal_iuf!([$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt | $($elem_ids),* | $(#[$doc])*); impl_ops_vector_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_scalar_arithmetic!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_vector_neg!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_ops_vector_float_min_max!( [$elem_ty; $elem_n]: $tuple_id | $test_tt ); impl_reduction_float_arithmetic!( [$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_reduction_min_max!( [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt ); impl_fmt_debug!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_from_array!([$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 1.)); impl_from_vectors!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ); impl_default!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_cmp_partial_eq!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (1., 0.) ); impl_slice_from_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_slice_write_to_slice!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_float_consts!([$elem_ty; $elem_n]: $tuple_id); impl_float_category!([$elem_ty; $elem_n]: $tuple_id, $mask_ty); // floating-point math impl_math_float_abs!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_cos!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_exp!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_ln!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_cmp_vertical!( [$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.) | $test_tt ); test_select!($elem_ty, $mask_ty, $tuple_id, (1., 2.) | $test_tt); test_reduction_float_min_max!( [$elem_ty; $elem_n]: $tuple_id | $test_tt ); test_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); } } macro_rules! impl_m { ([$elem_ty:ident; $elem_n:expr]: $tuple_id:ident | $ielem_ty:ident, $ibitmask_ty:ident | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { impl_minimal_mask!( [$elem_ty; $elem_n]: $tuple_id | $ielem_ty | $test_tt | $($elem_ids),* | $(#[$doc])* ); impl_ops_vector_mask_bitwise!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ); impl_ops_scalar_mask_bitwise!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ); impl_reduction_bitwise!( [bool; $elem_n]: $tuple_id | $ielem_ty | $test_tt | (|x|{ x != 0 }) | (true, false) ); impl_reduction_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_fmt_debug!([bool; $elem_n]: $tuple_id | $test_tt); impl_from_array!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (crate::$elem_ty::new(true), true) ); impl_from_vectors!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | $($from_vec_ty),* ); impl_default!([bool; $elem_n]: $tuple_id | $test_tt); impl_cmp_partial_eq!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ); impl_cmp_eq!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (true, false) ); impl_cmp_vertical!( [$elem_ty; $elem_n]: $tuple_id, $tuple_id, true, (true, false) | $test_tt ); impl_select!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_cmp_partial_ord!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_cmp_ord!( [$elem_ty; $elem_n]: $tuple_id | $test_tt | (false, true) ); impl_shuffle1_dyn!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_bitmask!($tuple_id | $ibitmask_ty | (true, false) | $test_tt); test_cmp_partial_ord_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); test_shuffle1_dyn_mask!([$elem_ty; $elem_n]: $tuple_id | $test_tt); } } macro_rules! impl_const_p { ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, $usize_ty:ident, $isize_ty:ident | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { impl_minimal_p!( [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty | ref_ | $test_tt | $($elem_ids),* | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* ); impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); } } macro_rules! impl_mut_p { ([$elem_ty:ty; $elem_n:expr]: $tuple_id:ident, $mask_ty:ident, $usize_ty:ident, $isize_ty:ident | $test_tt:tt | $($elem_ids:ident),* | From: $($from_vec_ty:ident),* | $(#[$doc:meta])*) => { impl_minimal_p!( [$elem_ty; $elem_n]: $tuple_id, $mask_ty, $usize_ty, $isize_ty | ref_mut_ | $test_tt | $($elem_ids),* | (1 as $elem_ty, 0 as $elem_ty) | $(#[$doc])* ); impl_ptr_read!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); impl_ptr_write!([$elem_ty; $elem_n]: $tuple_id, $mask_ty | $test_tt); } } packed_simd-0.3.3/src/api/bit_manip.rs010064400007650000024000000124761342461154100160620ustar0000000000000000//! Bit manipulations. macro_rules! impl_bit_manip { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Returns the number of ones in the binary representation of /// the lanes of `self`. #[inline] pub fn count_ones(self) -> Self { super::codegen::bit_manip::BitManip::ctpop(self) } /// Returns the number of zeros in the binary representation of /// the lanes of `self`. #[inline] pub fn count_zeros(self) -> Self { super::codegen::bit_manip::BitManip::ctpop(!self) } /// Returns the number of leading zeros in the binary /// representation of the lanes of `self`. #[inline] pub fn leading_zeros(self) -> Self { super::codegen::bit_manip::BitManip::ctlz(self) } /// Returns the number of trailing zeros in the binary /// representation of the lanes of `self`. #[inline] pub fn trailing_zeros(self) -> Self { super::codegen::bit_manip::BitManip::cttz(self) } } test_if! { $test_tt: paste::item_with_macros! { #[allow(overflowing_literals)] pub mod [<$id _bit_manip>] { use super::*; const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8; macro_rules! test_func { ($x:expr, $func:ident) => {{ let mut actual = $x; for i in 0..$id::lanes() { actual = actual.replace( i, $x.extract(i).$func() as $elem_ty ); } let expected = $x.$func(); assert_eq!(actual, expected); }}; } const BYTES: [u8; 64] = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, ]; fn load_bytes() -> $id { let elems: &mut [$elem_ty] = unsafe { slice::from_raw_parts_mut( BYTES.as_mut_ptr() as *mut $elem_ty, $id::lanes(), ) }; $id::from_slice_unaligned(elems) } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn count_ones() { test_func!($id::splat(0), count_ones); test_func!($id::splat(!0), count_ones); test_func!(load_bytes(), count_ones); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn count_zeros() { test_func!($id::splat(0), count_zeros); test_func!($id::splat(!0), count_zeros); test_func!(load_bytes(), count_zeros); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn leading_zeros() { test_func!($id::splat(0), leading_zeros); test_func!($id::splat(1), leading_zeros); // some implementations use `pshufb` which has unique // behavior when the 8th bit is set. test_func!($id::splat(0b1000_0010), leading_zeros); test_func!($id::splat(!0), leading_zeros); test_func!( $id::splat(1 << (LANE_WIDTH - 1)), leading_zeros ); test_func!(load_bytes(), leading_zeros); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn trailing_zeros() { test_func!($id::splat(0), trailing_zeros); test_func!($id::splat(1), trailing_zeros); test_func!($id::splat(0b1000_0010), trailing_zeros); test_func!($id::splat(!0), trailing_zeros); test_func!( $id::splat(1 << (LANE_WIDTH - 1)), trailing_zeros ); test_func!(load_bytes(), trailing_zeros); } } } } }; } packed_simd-0.3.3/src/api/bitmask.rs010064400007650000024000000063041342636765500155630ustar0000000000000000//! Bitmask API macro_rules! impl_bitmask { ($id:ident | $ibitmask_ty:ident | ($set:expr, $clear:expr) | $test_tt:tt) => { impl $id { /// Creates a bitmask with the MSB of each vector lane. /// /// If the vector has less than 8 lanes, the bits that do not /// correspond to any vector lanes are cleared. #[inline] pub fn bitmask(self) -> $ibitmask_ty { unsafe { codegen::llvm::simd_bitmask(self.0) } } } test_if! { $test_tt: paste::item! { #[cfg(not(any( // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/210 all(target_arch = "mips", target_endian = "big"), all(target_arch = "mips64", target_endian = "big"), target_arch = "sparc64", target_arch = "s390x", )))] pub mod [<$id _bitmask>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn bitmask() { // clear all lanes let vec = $id::splat($clear as _); let bitmask: $ibitmask_ty = 0; assert_eq!(vec.bitmask(), bitmask); // set even lanes let mut vec = $id::splat($clear as _); for i in 0..$id::lanes() { if i % 2 == 0 { vec = vec.replace(i, $set as _); } } // create bitmask with even lanes set: let mut bitmask: $ibitmask_ty = 0; for i in 0..$id::lanes() { if i % 2 == 0 { bitmask |= 1 << i; } } assert_eq!(vec.bitmask(), bitmask); // set odd lanes let mut vec = $id::splat($clear as _); for i in 0..$id::lanes() { if i % 2 != 0 { vec = vec.replace(i, $set as _); } } // create bitmask with odd lanes set: let mut bitmask: $ibitmask_ty = 0; for i in 0..$id::lanes() { if i % 2 != 0 { bitmask |= 1 << i; } } assert_eq!(vec.bitmask(), bitmask); // set all lanes let vec = $id::splat($set as _); let mut bitmask: $ibitmask_ty = 0; for i in 0..$id::lanes() { bitmask |= 1 << i; } assert_eq!(vec.bitmask(), bitmask); } } } } }; } packed_simd-0.3.3/src/api/cast.rs010064400007650000024000000061231342460246600150500ustar0000000000000000//! Implementation of `FromCast` and `IntoCast`. #![cfg_attr( feature = "cargo-clippy", allow( clippy::module_name_repetitions, clippy::stutter ) )] /// Numeric cast from `T` to `Self`. /// /// > Note: This is a temporary workaround until the conversion traits /// specified > in [RFC2484] are implemented. /// /// Numeric cast between vectors with the same number of lanes, such that: /// /// * casting integer vectors whose lane types have the same size (e.g. `i32xN` /// -> `u32xN`) is a **no-op**, /// /// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> /// `u8xN`) will **truncate**, /// /// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> /// `u32xN`) will: /// * **zero-extend** if the source is unsigned, or /// * **sign-extend** if the source is signed, /// /// * casting from a float to an integer will **round the float towards zero**, /// /// * casting from an integer to float will produce the floating point /// representation of the integer, **rounding to nearest, ties to even**, /// /// * casting from an `f32` to an `f64` is perfect and lossless, /// /// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. /// /// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 pub trait FromCast: crate::marker::Sized { /// Numeric cast from `T` to `Self`. fn from_cast(_: T) -> Self; } /// Numeric cast from `Self` to `T`. /// /// > Note: This is a temporary workaround until the conversion traits /// specified > in [RFC2484] are implemented. /// /// Numeric cast between vectors with the same number of lanes, such that: /// /// * casting integer vectors whose lane types have the same size (e.g. `i32xN` /// -> `u32xN`) is a **no-op**, /// /// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> /// `u8xN`) will **truncate**, /// /// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> /// `u32xN`) will: /// * **zero-extend** if the source is unsigned, or /// * **sign-extend** if the source is signed, /// /// * casting from a float to an integer will **round the float towards zero**, /// /// * casting from an integer to float will produce the floating point /// representation of the integer, **rounding to nearest, ties to even**, /// /// * casting from an `f32` to an `f64` is perfect and lossless, /// /// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. /// /// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 pub trait Cast: crate::marker::Sized { /// Numeric cast from `self` to `T`. fn cast(self) -> T; } /// `FromCast` implies `Cast`. impl Cast for T where U: FromCast, { #[inline] fn cast(self) -> U { U::from_cast(self) } } /// `FromCast` and `Cast` are reflexive impl FromCast for T { #[inline] fn from_cast(t: Self) -> Self { t } } #[macro_use] mod macros; mod v16; pub use self::v16::*; mod v32; pub use self::v32::*; mod v64; pub use self::v64::*; mod v128; pub use self::v128::*; mod v256; pub use self::v256::*; mod v512; pub use self::v512::*; packed_simd-0.3.3/src/api/cast/macros.rs010064400007650000024000000046031342460246600163350ustar0000000000000000//! Macros implementing `FromCast` macro_rules! impl_from_cast_ { ($id:ident[$test_tt:tt]: $from_ty:ident) => { impl crate::api::cast::FromCast<$from_ty> for $id { #[inline] fn from_cast(x: $from_ty) -> Self { use crate::llvm::simd_cast; debug_assert_eq!($from_ty::lanes(), $id::lanes()); Simd(unsafe { simd_cast(x.0) }) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _from_cast_ $from_ty>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn test() { assert_eq!($id::lanes(), $from_ty::lanes()); } } } } }; } macro_rules! impl_from_cast { ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { $( impl_from_cast_!($id[$test_tt]: $from_ty); )* } } macro_rules! impl_from_cast_mask_ { ($id:ident[$test_tt:tt]: $from_ty:ident) => { impl crate::api::cast::FromCast<$from_ty> for $id { #[inline] fn from_cast(x: $from_ty) -> Self { debug_assert_eq!($from_ty::lanes(), $id::lanes()); x.ne($from_ty::default()) .select($id::splat(true), $id::splat(false)) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _from_cast_ $from_ty>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn test() { assert_eq!($id::lanes(), $from_ty::lanes()); let x = $from_ty::default(); let m: $id = x.cast(); assert!(m.none()); } } } } }; } macro_rules! impl_from_cast_mask { ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { $( impl_from_cast_mask_!($id[$test_tt]: $from_ty); )* } } #[allow(unused)] macro_rules! impl_into_cast { ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { $( impl_from_cast_!($from_ty[$test_tt]: $id); )* } } packed_simd-0.3.3/src/api/cast/v128.rs010064400007650000024000000062041342460246600155500ustar0000000000000000//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors #![rustfmt::skip] use crate::*; impl_from_cast!( i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ); impl_from_cast!( u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ); impl_from_cast_mask!( m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ); impl_from_cast!( i16x8[test_v128]: i8x8, u8x8, m8x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( u16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast_mask!( m16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, u16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( i32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( u32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( f32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast_mask!( m32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( i64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast!( u64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast!( f64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast_mask!( m64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast!( isizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, usizex2, msizex2 ); impl_from_cast!( usizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, msizex2 ); impl_from_cast_mask!( msizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2 ); // FIXME[test_v128]: 64-bit single element vectors into_cast impls impl_from_cast!(i128x1[test_v128]: u128x1, m128x1); impl_from_cast!(u128x1[test_v128]: i128x1, m128x1); impl_from_cast!(m128x1[test_v128]: i128x1, u128x1); packed_simd-0.3.3/src/api/cast/v16.rs010064400007650000024000000012331342460246600154610ustar0000000000000000//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors #![rustfmt::skip] use crate::*; impl_from_cast!( i8x2[test_v16]: u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast!( u8x2[test_v16]: i8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast_mask!( m8x2[test_v16]: i8x2, u8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); packed_simd-0.3.3/src/api/cast/v256.rs010064400007650000024000000061711342460246600155550ustar0000000000000000//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors #![rustfmt::skip] use crate::*; impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32); impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32); impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32); impl_from_cast!( i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ); impl_from_cast!( u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16, i32x16, u32x16, f32x16, m32x16 ); impl_from_cast_mask!( m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16, i32x16, u32x16, f32x16, m32x16 ); impl_from_cast!( i32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( u32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( f32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast_mask!( m32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( i64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( u64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( f64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast_mask!( m64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( i128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast!( u128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast_mask!( m128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, m64x2, f64x2, i128x2, u128x2, isizex2, usizex2, msizex2 ); impl_from_cast!( isizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, usizex4, msizex4 ); impl_from_cast!( usizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, msizex4 ); impl_from_cast_mask!( msizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4 ); packed_simd-0.3.3/src/api/cast/v32.rs010064400007650000024000000023061342460246600154610ustar0000000000000000//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors #![rustfmt::skip] use crate::*; impl_from_cast!( i8x4[test_v32]: u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( u8x4[test_v32]: i8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast_mask!( m8x4[test_v32]: i8x4, u8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( i16x2[test_v32]: i8x2, u8x2, m8x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast!( u16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, m16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast_mask!( m16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, u16x2, i32x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); packed_simd-0.3.3/src/api/cast/v512.rs010064400007650000024000000051071342460246600155460ustar0000000000000000//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors #![rustfmt::skip] use crate::*; impl_from_cast!(i8x64[test_v512]: u8x64, m8x64); impl_from_cast!(u8x64[test_v512]: i8x64, m8x64); impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64); impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32); impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32); impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32); impl_from_cast!( i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16 ); impl_from_cast!( u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16 ); impl_from_cast!( f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16 ); impl_from_cast_mask!( m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16 ); impl_from_cast!( i64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( u64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( f64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast_mask!( m64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( i128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( u128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast_mask!( m128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, m64x4, f64x4, i128x4, u128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( isizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, usizex8, msizex8 ); impl_from_cast!( usizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, msizex8 ); impl_from_cast_mask!( msizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8 ); packed_simd-0.3.3/src/api/cast/v64.rs010064400007650000024000000035401342460246600154670ustar0000000000000000//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors #![rustfmt::skip] use crate::*; impl_from_cast!( i8x8[test_v64]: u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( u8x8[test_v64]: i8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast_mask!( m8x8[test_v64]: i8x8, u8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 ); impl_from_cast!( i16x4[test_v64]: i8x4, u8x4, m8x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( u16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, m16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast_mask!( m16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, u16x4, i32x4, u32x4, f32x4, m32x4, i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 ); impl_from_cast!( i32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, u32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast!( u32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, f32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast!( f32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, m32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); impl_from_cast_mask!( m32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 ); packed_simd-0.3.3/src/api/cmp.rs010064400007650000024000000002601332536563700146770ustar0000000000000000//! Implement cmp traits for vector types #[macro_use] mod partial_eq; #[macro_use] mod eq; #[macro_use] mod partial_ord; #[macro_use] mod ord; #[macro_use] mod vertical; packed_simd-0.3.3/src/api/cmp/eq.rs010064400007650000024000000014531342460246600153030ustar0000000000000000//! Implements `Eq` for vector types. macro_rules! impl_cmp_eq { ( [$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | ($true:expr, $false:expr) ) => { impl crate::cmp::Eq for $id {} impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} test_if!{ $test_tt: paste::item! { pub mod [<$id _cmp_eq>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn eq() { fn foo(_: E) {} let a = $id::splat($false); foo(a); } } } } }; } packed_simd-0.3.3/src/api/cmp/ord.rs010064400007650000024000000025251342460246600154630ustar0000000000000000//! Implements `Ord` for vector types. macro_rules! impl_cmp_ord { ( [$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | ($true:expr, $false:expr) ) => { impl $id { /// Returns a wrapper that implements `Ord`. #[inline] pub fn lex_ord(&self) -> LexicographicallyOrdered<$id> { LexicographicallyOrdered(*self) } } impl crate::cmp::Ord for LexicographicallyOrdered<$id> { #[inline] fn cmp(&self, other: &Self) -> crate::cmp::Ordering { match self.partial_cmp(other) { Some(x) => x, None => unsafe { crate::hint::unreachable_unchecked() }, } } } test_if!{ $test_tt: paste::item! { pub mod [<$id _cmp_ord>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn eq() { fn foo(_: E) {} let a = $id::splat($false); foo(a.partial_lex_ord()); foo(a.lex_ord()); } } } } }; } packed_simd-0.3.3/src/api/cmp/partial_eq.rs010064400007650000024000000043321342460246600170160ustar0000000000000000//! Implements `PartialEq` for vector types. macro_rules! impl_cmp_partial_eq { ( [$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | ($true:expr, $false:expr) ) => { // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 #[cfg_attr(feature = "cargo-clippy", allow(clippy::partialeq_ne_impl))] impl crate::cmp::PartialEq<$id> for $id { #[inline] fn eq(&self, other: &Self) -> bool { $id::eq(*self, *other).all() } #[inline] fn ne(&self, other: &Self) -> bool { $id::ne(*self, *other).any() } } // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 #[cfg_attr(feature = "cargo-clippy", allow(clippy::partialeq_ne_impl))] impl crate::cmp::PartialEq> for LexicographicallyOrdered<$id> { #[inline] fn eq(&self, other: &Self) -> bool { self.0 == other.0 } #[inline] fn ne(&self, other: &Self) -> bool { self.0 != other.0 } } test_if!{ $test_tt: paste::item! { pub mod [<$id _cmp_PartialEq>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn partial_eq() { let a = $id::splat($false); let b = $id::splat($true); assert!(a != b); assert!(!(a == b)); assert!(a == a); assert!(!(a != a)); if $id::lanes() > 1 { let a = $id::splat($false).replace(0, $true); let b = $id::splat($true); assert!(a != b); assert!(!(a == b)); assert!(a == a); assert!(!(a != a)); } } } } } }; } packed_simd-0.3.3/src/api/cmp/partial_ord.rs010064400007650000024000000257171342460246600172070ustar0000000000000000//! Implements `PartialOrd` for vector types. //! //! This implements a lexicographical order. macro_rules! impl_cmp_partial_ord { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Returns a wrapper that implements `PartialOrd`. #[inline] pub fn partial_lex_ord(&self) -> LexicographicallyOrdered<$id> { LexicographicallyOrdered(*self) } } impl crate::cmp::PartialOrd> for LexicographicallyOrdered<$id> { #[inline] fn partial_cmp( &self, other: &Self, ) -> Option { if PartialEq::eq(self, other) { Some(crate::cmp::Ordering::Equal) } else if PartialOrd::lt(self, other) { Some(crate::cmp::Ordering::Less) } else if PartialOrd::gt(self, other) { Some(crate::cmp::Ordering::Greater) } else { None } } #[inline] fn lt(&self, other: &Self) -> bool { let m_lt = self.0.lt(other.0); let m_eq = self.0.eq(other.0); for i in 0..$id::lanes() { if m_eq.extract(i) { continue; } return m_lt.extract(i); } false } #[inline] fn le(&self, other: &Self) -> bool { self.lt(other) | PartialEq::eq(self, other) } #[inline] fn ge(&self, other: &Self) -> bool { self.gt(other) | PartialEq::eq(self, other) } #[inline] fn gt(&self, other: &Self) -> bool { let m_gt = self.0.gt(other.0); let m_eq = self.0.eq(other.0); for i in 0..$id::lanes() { if m_eq.extract(i) { continue; } return m_gt.extract(i); } false } } }; } macro_rules! test_cmp_partial_ord_int { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { test_if!{ $test_tt: paste::item! { pub mod [<$id _cmp_PartialOrd>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn partial_lex_ord() { use crate::testing::utils::{test_cmp}; // constant values let a = $id::splat(0); let b = $id::splat(1); test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Less)); test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Greater)); test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); // variable values: a = [0, 1, 2, 3]; b = [3, 2, 1, 0] let mut a = $id::splat(0); let mut b = $id::splat(0); for i in 0..$id::lanes() { a = a.replace(i, i as $elem_ty); b = b.replace(i, ($id::lanes() - i) as $elem_ty); } test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Less)); test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Greater)); test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); // variable values: a = [0, 1, 2, 3]; b = [0, 1, 2, 4] let mut b = a; b = b.replace( $id::lanes() - 1, a.extract($id::lanes() - 1) + 1 as $elem_ty ); test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Less)); test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Greater)); test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); if $id::lanes() > 2 { // variable values a = [0, 1, 0, 0]; b = [0, 1, 2, 3] let b = a; let mut a = $id::splat(0); a = a.replace(1, 1 as $elem_ty); test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Less)); test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Greater)); test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); // variable values: a = [0, 1, 2, 3]; b = [0, 1, 3, 2] let mut b = a; b = b.replace( 2, a.extract($id::lanes() - 1) + 1 as $elem_ty ); test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Less)); test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Greater)); test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), Some(crate::cmp::Ordering::Equal)); } } } } } }; } macro_rules! test_cmp_partial_ord_mask { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { test_if!{ $test_tt: paste::item! { pub mod [<$id _cmp_PartialOrd>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn partial_lex_ord() { use crate::testing::utils::{test_cmp}; use crate::cmp::Ordering; // constant values let a = $id::splat(false); let b = $id::splat(true); test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), Some(Ordering::Less)); test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), Some(Ordering::Greater)); test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), Some(Ordering::Equal)); test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), Some(Ordering::Equal)); // variable values: // a = [false, false, false, false]; // b = [false, false, false, true] let a = $id::splat(false); let mut b = $id::splat(false); b = b.replace($id::lanes() - 1, true); test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), Some(Ordering::Less)); test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), Some(Ordering::Greater)); test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), Some(Ordering::Equal)); test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), Some(Ordering::Equal)); // variable values: // a = [true, true, true, false]; // b = [true, true, true, true] let mut a = $id::splat(true); let b = $id::splat(true); a = a.replace($id::lanes() - 1, false); test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), Some(Ordering::Less)); test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), Some(Ordering::Greater)); test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), Some(Ordering::Equal)); test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), Some(Ordering::Equal)); if $id::lanes() > 2 { // variable values // a = [false, true, false, false]; // b = [false, true, true, true] let mut a = $id::splat(false); let mut b = $id::splat(true); a = a.replace(1, true); b = b.replace(0, false); test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), Some(Ordering::Less)); test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), Some(Ordering::Greater)); test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), Some(Ordering::Equal)); test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), Some(Ordering::Equal)); } } } } } }; } packed_simd-0.3.3/src/api/cmp/vertical.rs010064400007650000024000000077761342460246600165250ustar0000000000000000//! Vertical (lane-wise) vector comparisons returning vector masks. macro_rules! impl_cmp_vertical { ( [$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident, $is_mask:expr,($true:expr, $false:expr) | $test_tt:tt ) => { impl $id { /// Lane-wise equality comparison. #[inline] pub fn eq(self, other: Self) -> $mask_ty { use crate::llvm::simd_eq; Simd(unsafe { simd_eq(self.0, other.0) }) } /// Lane-wise inequality comparison. #[inline] pub fn ne(self, other: Self) -> $mask_ty { use crate::llvm::simd_ne; Simd(unsafe { simd_ne(self.0, other.0) }) } /// Lane-wise less-than comparison. #[inline] pub fn lt(self, other: Self) -> $mask_ty { use crate::llvm::{simd_gt, simd_lt}; if $is_mask { Simd(unsafe { simd_gt(self.0, other.0) }) } else { Simd(unsafe { simd_lt(self.0, other.0) }) } } /// Lane-wise less-than-or-equals comparison. #[inline] pub fn le(self, other: Self) -> $mask_ty { use crate::llvm::{simd_ge, simd_le}; if $is_mask { Simd(unsafe { simd_ge(self.0, other.0) }) } else { Simd(unsafe { simd_le(self.0, other.0) }) } } /// Lane-wise greater-than comparison. #[inline] pub fn gt(self, other: Self) -> $mask_ty { use crate::llvm::{simd_gt, simd_lt}; if $is_mask { Simd(unsafe { simd_lt(self.0, other.0) }) } else { Simd(unsafe { simd_gt(self.0, other.0) }) } } /// Lane-wise greater-than-or-equals comparison. #[inline] pub fn ge(self, other: Self) -> $mask_ty { use crate::llvm::{simd_ge, simd_le}; if $is_mask { Simd(unsafe { simd_le(self.0, other.0) }) } else { Simd(unsafe { simd_ge(self.0, other.0) }) } } } test_if!{ $test_tt: paste::item! { pub mod [<$id _cmp_vertical>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn cmp() { let a = $id::splat($false); let b = $id::splat($true); let r = a.lt(b); let e = $mask_ty::splat(true); assert!(r == e); let r = a.le(b); assert!(r == e); let e = $mask_ty::splat(false); let r = a.gt(b); assert!(r == e); let r = a.ge(b); assert!(r == e); let r = a.eq(b); assert!(r == e); let mut a = a; let mut b = b; let mut e = e; for i in 0..$id::lanes() { if i % 2 == 0 { a = a.replace(i, $false); b = b.replace(i, $true); e = e.replace(i, true); } else { a = a.replace(i, $true); b = b.replace(i, $false); e = e.replace(i, false); } } let r = a.lt(b); assert!(r == e); } } } } }; } packed_simd-0.3.3/src/api/default.rs010064400007650000024000000015571342460246600155500ustar0000000000000000//! Implements `Default` for vector types. macro_rules! impl_default { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl Default for $id { #[inline] fn default() -> Self { Self::splat($elem_ty::default()) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _default>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn default() { let a = $id::default(); for i in 0..$id::lanes() { assert_eq!(a.extract(i), $elem_ty::default()); } } } } } }; } packed_simd-0.3.3/src/api/fmt.rs010064400007650000024000000002411332536563700147050ustar0000000000000000//! Implements formatting APIs #[macro_use] mod debug; #[macro_use] mod lower_hex; #[macro_use] mod upper_hex; #[macro_use] mod octal; #[macro_use] mod binary; packed_simd-0.3.3/src/api/fmt/binary.rs010064400007650000024000000044221342460246600161700ustar0000000000000000//! Implement Octal formatting macro_rules! impl_fmt_binary { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::fmt::Binary for $id { #[cfg_attr( feature = "cargo-clippy", allow(clippy::missing_inline_in_public_items) )] fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result { write!(f, "{}(", stringify!($id))?; for i in 0..$elem_count { if i > 0 { write!(f, ", ")?; } self.extract(i).fmt(f)?; } write!(f, ")") } } test_if!{ $test_tt: paste::item! { pub mod [<$id _fmt_binary>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn binary() { use arrayvec::{ArrayString,ArrayVec}; type TinyString = ArrayString<[u8; 512]>; use crate::fmt::Write; let v = $id::splat($elem_ty::default()); let mut s = TinyString::new(); write!(&mut s, "{:#b}", v).unwrap(); let mut beg = TinyString::new(); write!(&mut beg, "{}(", stringify!($id)).unwrap(); assert!(s.starts_with(beg.as_str())); assert!(s.ends_with(")")); let s: ArrayVec<[TinyString; 64]> = s.replace(beg.as_str(), "") .replace(")", "").split(",") .map(|v| TinyString::from(v.trim()).unwrap()) .collect(); assert_eq!(s.len(), $id::lanes()); for (index, ss) in s.into_iter().enumerate() { let mut e = TinyString::new(); write!(&mut e, "{:#b}", v.extract(index)).unwrap(); assert_eq!(ss, e); } } } } } }; } packed_simd-0.3.3/src/api/fmt/debug.rs010064400007650000024000000047161342460246600160000ustar0000000000000000//! Implement debug formatting macro_rules! impl_fmt_debug_tests { ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { test_if!{ $test_tt: paste::item! { pub mod [<$id _fmt_debug>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn debug() { use arrayvec::{ArrayString,ArrayVec}; type TinyString = ArrayString<[u8; 512]>; use crate::fmt::Write; let v = $id::default(); let mut s = TinyString::new(); write!(&mut s, "{:?}", v).unwrap(); let mut beg = TinyString::new(); write!(&mut beg, "{}(", stringify!($id)).unwrap(); assert!(s.starts_with(beg.as_str())); assert!(s.ends_with(")")); let s: ArrayVec<[TinyString; 64]> = s.replace(beg.as_str(), "") .replace(")", "").split(",") .map(|v| TinyString::from(v.trim()).unwrap()) .collect(); assert_eq!(s.len(), $id::lanes()); for (index, ss) in s.into_iter().enumerate() { let mut e = TinyString::new(); write!(&mut e, "{:?}", v.extract(index)).unwrap(); assert_eq!(ss, e); } } } } } }; } macro_rules! impl_fmt_debug { ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::fmt::Debug for $id { #[cfg_attr( feature = "cargo-clippy", allow(clippy::missing_inline_in_public_items) )] fn fmt( &self, f: &mut crate::fmt::Formatter<'_>, ) -> crate::fmt::Result { write!(f, "{}(", stringify!($id))?; for i in 0..$elem_count { if i > 0 { write!(f, ", ")?; } self.extract(i).fmt(f)?; } write!(f, ")") } } impl_fmt_debug_tests!([$elem_ty; $elem_count]: $id | $test_tt); }; } packed_simd-0.3.3/src/api/fmt/lower_hex.rs010064400007650000024000000044361342460246600167050ustar0000000000000000//! Implement `LowerHex` formatting macro_rules! impl_fmt_lower_hex { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::fmt::LowerHex for $id { #[cfg_attr( feature = "cargo-clippy", allow(clippy::missing_inline_in_public_items) )] fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result { write!(f, "{}(", stringify!($id))?; for i in 0..$elem_count { if i > 0 { write!(f, ", ")?; } self.extract(i).fmt(f)?; } write!(f, ")") } } test_if!{ $test_tt: paste::item! { pub mod [<$id _fmt_lower_hex>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn lower_hex() { use arrayvec::{ArrayString,ArrayVec}; type TinyString = ArrayString<[u8; 512]>; use crate::fmt::Write; let v = $id::splat($elem_ty::default()); let mut s = TinyString::new(); write!(&mut s, "{:#x}", v).unwrap(); let mut beg = TinyString::new(); write!(&mut beg, "{}(", stringify!($id)).unwrap(); assert!(s.starts_with(beg.as_str())); assert!(s.ends_with(")")); let s: ArrayVec<[TinyString; 64]> = s.replace(beg.as_str(), "").replace(")", "") .split(",") .map(|v| TinyString::from(v.trim()).unwrap()) .collect(); assert_eq!(s.len(), $id::lanes()); for (index, ss) in s.into_iter().enumerate() { let mut e = TinyString::new(); write!(&mut e, "{:#x}", v.extract(index)).unwrap(); assert_eq!(ss, e); } } } } } }; } packed_simd-0.3.3/src/api/fmt/octal.rs010064400007650000024000000044221342460246600160060ustar0000000000000000//! Implement Octal formatting macro_rules! impl_fmt_octal { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::fmt::Octal for $id { #[cfg_attr( feature = "cargo-clippy", allow(clippy::missing_inline_in_public_items) )] fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result { write!(f, "{}(", stringify!($id))?; for i in 0..$elem_count { if i > 0 { write!(f, ", ")?; } self.extract(i).fmt(f)?; } write!(f, ")") } } test_if!{ $test_tt: paste::item! { pub mod [<$id _fmt_octal>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn octal_hex() { use arrayvec::{ArrayString,ArrayVec}; type TinyString = ArrayString<[u8; 512]>; use crate::fmt::Write; let v = $id::splat($elem_ty::default()); let mut s = TinyString::new(); write!(&mut s, "{:#o}", v).unwrap(); let mut beg = TinyString::new(); write!(&mut beg, "{}(", stringify!($id)).unwrap(); assert!(s.starts_with(beg.as_str())); assert!(s.ends_with(")")); let s: ArrayVec<[TinyString; 64]> = s.replace(beg.as_str(), "").replace(")", "") .split(",") .map(|v| TinyString::from(v.trim()).unwrap()) .collect(); assert_eq!(s.len(), $id::lanes()); for (index, ss) in s.into_iter().enumerate() { let mut e = TinyString::new(); write!(&mut e, "{:#o}", v.extract(index)).unwrap(); assert_eq!(ss, e); } } } } } }; } packed_simd-0.3.3/src/api/fmt/upper_hex.rs010064400007650000024000000044421342460246600167050ustar0000000000000000//! Implement `UpperHex` formatting macro_rules! impl_fmt_upper_hex { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::fmt::UpperHex for $id { #[cfg_attr( feature = "cargo-clippy", allow(clippy::missing_inline_in_public_items) )] fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result { write!(f, "{}(", stringify!($id))?; for i in 0..$elem_count { if i > 0 { write!(f, ", ")?; } self.extract(i).fmt(f)?; } write!(f, ")") } } test_if!{ $test_tt: paste::item! { pub mod [<$id _fmt_upper_hex>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn upper_hex() { use arrayvec::{ArrayString,ArrayVec}; type TinyString = ArrayString<[u8; 512]>; use crate::fmt::Write; let v = $id::splat($elem_ty::default()); let mut s = TinyString::new(); write!(&mut s, "{:#X}", v).unwrap(); let mut beg = TinyString::new(); write!(&mut beg, "{}(", stringify!($id)).unwrap(); assert!(s.starts_with(beg.as_str())); assert!(s.ends_with(")")); let s: ArrayVec<[TinyString; 64]> = s.replace(beg.as_str(), "").replace(")", "") .split(",") .map(|v| TinyString::from(v.trim()).unwrap()) .collect(); assert_eq!(s.len(), $id::lanes()); for (index, ss) in s.into_iter().enumerate() { let mut e = TinyString::new(); write!(&mut e, "{:#X}", v.extract(index)).unwrap(); assert_eq!(ss, e); } } } } } }; } packed_simd-0.3.3/src/api/from.rs010064400007650000024000000001611332536563700150630ustar0000000000000000//! Implementations of the `From` and `Into` traits #[macro_use] mod from_array; #[macro_use] mod from_vector; packed_simd-0.3.3/src/api/from/from_array.rs010064400007650000024000000107631342461154100172210ustar0000000000000000//! Implements `From<[T; N]>` and `Into<[T; N]>` for vector types. macro_rules! impl_from_array { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | ($non_default_array:expr, $non_default_vec:expr)) => { impl From<[$elem_ty; $elem_count]> for $id { #[inline] fn from(array: [$elem_ty; $elem_count]) -> Self { union U { array: [$elem_ty; $elem_count], vec: $id, } unsafe { U { array }.vec } } } impl From<$id> for [$elem_ty; $elem_count] { #[inline] fn from(vec: $id) -> Self { union U { array: [$elem_ty; $elem_count], vec: $id, } unsafe { U { vec }.array } } } // FIXME: `Into::into` is not inline, but due to // the blanket impl in `std`, which is not // marked `default`, we cannot override it here with // specialization. /* impl Into<[$elem_ty; $elem_count]> for $id { #[inline] fn into(self) -> [$elem_ty; $elem_count] { union U { array: [$elem_ty; $elem_count], vec: $id, } unsafe { U { vec: self }.array } } } impl Into<$id> for [$elem_ty; $elem_count] { #[inline] fn into(self) -> $id { union U { array: [$elem_ty; $elem_count], vec: $id, } unsafe { U { array: self }.vec } } } */ test_if! { $test_tt: paste::item! { mod [<$id _from>] { use super::*; #[test] fn array() { let vec: $id = Default::default(); // FIXME: Workaround for arrays with more than 32 // elements. // // Safe because we never take a reference to any // uninitialized element. union W { array: [$elem_ty; $elem_count], other: () } let mut array = W { other: () }; for i in 0..$elem_count { let default: $elem_ty = Default::default(); // note: array.other is the active member and // initialized so we can take a reference to it: let p = unsafe { &mut array.other as *mut () as *mut $elem_ty }; // note: default is a valid bit-pattern for // $elem_ty: unsafe { crate::ptr::write(p.wrapping_add(i), default) }; } // note: the array variant of the union is properly // initialized: let mut array = unsafe { array.array }; array[0] = $non_default_array; let vec = vec.replace(0, $non_default_vec); let vec_from_array = $id::from(array); assert_eq!(vec_from_array, vec); let array_from_vec = <[$elem_ty; $elem_count]>::from(vec); // FIXME: Workaround for arrays with more than 32 // elements. for i in 0..$elem_count { assert_eq!(array_from_vec[i], array[i]); } let vec_from_into_array: $id = array.into(); assert_eq!(vec_from_into_array, vec); let array_from_into_vec: [$elem_ty; $elem_count] = vec.into(); // FIXME: Workaround for arrays with more than 32 // elements. for i in 0..$elem_count { assert_eq!(array_from_into_vec[i], array[i]); } } } } } }; } packed_simd-0.3.3/src/api/from/from_vector.rs010064400007650000024000000041511342461154100173770ustar0000000000000000//! Implements `From` and `Into` for vector types. macro_rules! impl_from_vector { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | $source:ident) => { impl From<$source> for $id { #[inline] fn from(source: $source) -> Self { fn static_assert_same_number_of_lanes() where T: crate::sealed::Simd, U: crate::sealed::Simd, { } use crate::llvm::simd_cast; static_assert_same_number_of_lanes::<$id, $source>(); Simd(unsafe { simd_cast(source.0) }) } } // FIXME: `Into::into` is not inline, but due to the blanket impl in // `std`, which is not marked `default`, we cannot override it here // with specialization. /* impl Into<$id> for $source { #[inline] fn into(self) -> $id { unsafe { simd_cast(self) } } } */ test_if! { $test_tt: paste::item! { pub mod [<$id _from_ $source>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn from() { assert_eq!($id::lanes(), $source::lanes()); let source: $source = Default::default(); let vec: $id = Default::default(); let e = $id::from(source); assert_eq!(e, vec); let e: $id = source.into(); assert_eq!(e, vec); } } } } }; } macro_rules! impl_from_vectors { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | $($source:ident),*) => { $( impl_from_vector!( [$elem_ty; $elem_count]: $id | $test_tt | $source ); )* } } packed_simd-0.3.3/src/api/hash.rs010064400007650000024000000033211342460246600150360ustar0000000000000000//! Implements `Hash` for vector types. macro_rules! impl_hash { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::hash::Hash for $id { #[inline] fn hash(&self, state: &mut H) { unsafe { union A { data: [$elem_ty; $id::lanes()], vec: $id, } A { vec: *self }.data.hash(state) } } } test_if! { $test_tt: paste::item! { pub mod [<$id _hash>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn hash() { use crate::hash::{Hash, Hasher}; #[allow(deprecated)] use crate::hash::{SipHasher13}; type A = [$elem_ty; $id::lanes()]; let a: A = [42 as $elem_ty; $id::lanes()]; assert_eq!( crate::mem::size_of::(), crate::mem::size_of::<$id>() ); #[allow(deprecated)] let mut a_hash = SipHasher13::new(); let mut v_hash = a_hash.clone(); a.hash(&mut a_hash); let v = $id::splat(42 as $elem_ty); v.hash(&mut v_hash); assert_eq!(a_hash.finish(), v_hash.finish()); } } } } }; } packed_simd-0.3.3/src/api/into_bits.rs010064400007650000024000000021611342460246600161060ustar0000000000000000//! Implementation of `FromBits` and `IntoBits`. /// Safe lossless bitwise conversion from `T` to `Self`. pub trait FromBits: crate::marker::Sized { /// Safe lossless bitwise transmute from `T` to `Self`. fn from_bits(t: T) -> Self; } /// Safe lossless bitwise conversion from `Self` to `T`. pub trait IntoBits: crate::marker::Sized { /// Safe lossless bitwise transmute from `self` to `T`. fn into_bits(self) -> T; } /// `FromBits` implies `IntoBits`. impl IntoBits for T where U: FromBits, { #[inline] fn into_bits(self) -> U { debug_assert!( crate::mem::size_of::() == crate::mem::size_of::() ); U::from_bits(self) } } /// `FromBits` and `IntoBits` are reflexive impl FromBits for T { #[inline] fn from_bits(t: Self) -> Self { t } } #[macro_use] mod macros; mod v16; pub use self::v16::*; mod v32; pub use self::v32::*; mod v64; pub use self::v64::*; mod v128; pub use self::v128::*; mod v256; pub use self::v256::*; mod v512; pub use self::v512::*; mod arch_specific; pub use self::arch_specific::*; packed_simd-0.3.3/src/api/into_bits/arch_specific.rs010064400007650000024000000160011342636765500207000ustar0000000000000000//! `FromBits` and `IntoBits` between portable vector types and the //! architecture-specific vector types. #![rustfmt::skip] // FIXME: MIPS FromBits/IntoBits #[allow(unused)] use crate::*; /// This macro implements FromBits for the portable and the architecture /// specific vector types. /// /// The "leaf" case is at the bottom, and the most generic case is at the top. /// The generic case is split into smaller cases recursively. macro_rules! impl_arch { ([$arch_head_i:ident[$arch_head_tt:tt]: $($arch_head_ty:ident),*], $([$arch_tail_i:ident[$arch_tail_tt:tt]: $($arch_tail_ty:ident),*]),* | from: $($from_ty:ident),* | into: $($into_ty:ident),* | test: $test_tt:tt) => { impl_arch!( [$arch_head_i[$arch_head_tt]: $($arch_head_ty),*] | from: $($from_ty),* | into: $($into_ty),* | test: $test_tt ); impl_arch!( $([$arch_tail_i[$arch_tail_tt]: $($arch_tail_ty),*]),* | from: $($from_ty),* | into: $($into_ty),* | test: $test_tt ); }; ([$arch:ident[$arch_tt:tt]: $($arch_ty:ident),*] | from: $($from_ty:ident),* | into: $($into_ty:ident),* | test: $test_tt:tt) => { // note: if target is "arm", "+v7,+neon" must be enabled // and the std library must be recompiled with them #[cfg(any( not(target_arch = "arm"), all(target_feature = "v7", target_feature = "neon", any(feature = "core_arch", libcore_neon))) )] // note: if target is "powerpc", "altivec" must be enabled // and the std library must be recompiled with it #[cfg(any( not(target_arch = "powerpc"), all(target_feature = "altivec", feature = "core_arch"), ))] #[cfg(target_arch = $arch_tt)] use crate::arch::$arch::{ $($arch_ty),* }; #[cfg(any( not(target_arch = "arm"), all(target_feature = "v7", target_feature = "neon", any(feature = "core_arch", libcore_neon))) )] #[cfg(any( not(target_arch = "powerpc"), all(target_feature = "altivec", feature = "core_arch"), ))] #[cfg(target_arch = $arch_tt)] impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* | test: $test_tt); }; ($arch_head:ident, $($arch_tail:ident),* | $($from_ty:ident),* | $($into_ty:ident),* | test: $test_tt:tt) => { impl_arch!($arch_head | $($from_ty),* | $($into_ty),* | test: $test_tt); impl_arch!($($arch_tail),* | $($from_ty),* | $($into_ty),* | test: $test_tt); }; ($arch_head:ident | $($from_ty:ident),* | $($into_ty:ident),* | test: $test_tt:tt) => { impl_from_bits!($arch_head[$test_tt]: $($from_ty),*); impl_into_bits!($arch_head[$test_tt]: $($into_ty),*); }; } //////////////////////////////////////////////////////////////////////////////// // Implementations for the 64-bit wide vector types: // FIXME: 64-bit single element types // FIXME: arm/aarch float16x4_t missing impl_arch!( [x86["x86"]: __m64], [x86_64["x86_64"]: __m64], [arm["arm"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, uint64x1_t], [aarch64["aarch64"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, uint64x1_t, float64x1_t] | from: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2 | into: i8x8, u8x8, i16x4, u16x4, i32x2, u32x2, f32x2 | test: test_v64 ); //////////////////////////////////////////////////////////////////////////////// // Implementations for the 128-bit wide vector types: // FIXME: arm/aarch float16x8_t missing // FIXME: ppc vector_pixel missing // FIXME: ppc64 vector_Float16 missing // FIXME: ppc64 vector_signed_long_long missing // FIXME: ppc64 vector_unsigned_long_long missing // FIXME: ppc64 vector_bool_long_long missing // FIXME: ppc64 vector_signed___int128 missing // FIXME: ppc64 vector_unsigned___int128 missing impl_arch!( [x86["x86"]: __m128, __m128i, __m128d], [x86_64["x86_64"]: __m128, __m128i, __m128d], [arm["arm"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, uint16x8_t, poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, uint64x2_t], [aarch64["aarch64"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, uint16x8_t, poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, uint64x2_t, float64x2_t], [powerpc["powerpc"]: vector_signed_char, vector_unsigned_char, vector_signed_short, vector_unsigned_short, vector_signed_int, vector_unsigned_int, vector_float], [powerpc64["powerpc64"]: vector_signed_char, vector_unsigned_char, vector_signed_short, vector_unsigned_short, vector_signed_int, vector_unsigned_int, vector_float, vector_signed_long, vector_unsigned_long, vector_double] | from: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1 | into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, i128x1, u128x1 | test: test_v128 ); impl_arch!( [powerpc["powerpc"]: vector_bool_char], [powerpc64["powerpc64"]: vector_bool_char] | from: m8x16, m16x8, m32x4, m64x2, m128x1 | into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, i128x1, u128x1, // Masks: m8x16 | test: test_v128 ); impl_arch!( [powerpc["powerpc"]: vector_bool_short], [powerpc64["powerpc64"]: vector_bool_short] | from: m16x8, m32x4, m64x2, m128x1 | into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, i128x1, u128x1, // Masks: m8x16, m16x8 | test: test_v128 ); impl_arch!( [powerpc["powerpc"]: vector_bool_int], [powerpc64["powerpc64"]: vector_bool_int] | from: m32x4, m64x2, m128x1 | into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, i128x1, u128x1, // Masks: m8x16, m16x8, m32x4 | test: test_v128 ); impl_arch!( [powerpc64["powerpc64"]: vector_bool_long] | from: m64x2, m128x1 | into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, i128x1, u128x1, // Masks: m8x16, m16x8, m32x4, m64x2 | test: test_v128 ); //////////////////////////////////////////////////////////////////////////////// // Implementations for the 256-bit wide vector types impl_arch!( [x86["x86"]: __m256, __m256i, __m256d], [x86_64["x86_64"]: __m256, __m256i, __m256d] | from: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2 | into: i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, f32x8, i64x4, u64x4, f64x4, i128x2, u128x2 | test: test_v256 ); //////////////////////////////////////////////////////////////////////////////// // FIXME: Implementations for the 512-bit wide vector types packed_simd-0.3.3/src/api/into_bits/macros.rs010064400007650000024000000053111342461154100173640ustar0000000000000000//! Macros implementing `FromBits` macro_rules! impl_from_bits_ { ($id:ident[$test_tt:tt]: $from_ty:ident) => { impl crate::api::into_bits::FromBits<$from_ty> for $id { #[inline] fn from_bits(x: $from_ty) -> Self { unsafe { crate::mem::transmute(x) } } } test_if! { $test_tt: paste::item! { pub mod [<$id _from_bits_ $from_ty>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn test() { use crate::{ ptr::{read_unaligned}, mem::{size_of, zeroed} }; use crate::IntoBits; assert_eq!(size_of::<$id>(), size_of::<$from_ty>()); // This is safe becasue we never create a reference to // uninitialized memory: let a: $from_ty = unsafe { zeroed() }; let b_0: $id = crate::FromBits::from_bits(a); let b_1: $id = a.into_bits(); // Check that these are byte-wise equal, that is, // that the bit patterns are identical: for i in 0..size_of::<$id>() { // This is safe because we only read initialized // memory in bounds. Also, taking a reference to // `b_i` is ok because the fields are initialized. unsafe { let b_0_v: u8 = read_unaligned( (&b_0 as *const $id as *const u8) .wrapping_add(i) ); let b_1_v: u8 = read_unaligned( (&b_1 as *const $id as *const u8) .wrapping_add(i) ); assert_eq!(b_0_v, b_1_v); } } } } } } }; } macro_rules! impl_from_bits { ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { $( impl_from_bits_!($id[$test_tt]: $from_ty); )* } } #[allow(unused)] macro_rules! impl_into_bits { ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { $( impl_from_bits_!($from_ty[$test_tt]: $id); )* } } packed_simd-0.3.3/src/api/into_bits/v128.rs010064400007650000024000000043201342460246600166050ustar0000000000000000//! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors #![rustfmt::skip] #[allow(unused)] // wasm_bindgen_test use crate::*; impl_from_bits!(i8x16[test_v128]: u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(u8x16[test_v128]: i8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(m8x16[test_v128]: m16x8, m32x4, m64x2, m128x1); impl_from_bits!(i16x8[test_v128]: i8x16, u8x16, m8x16, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(u16x8[test_v128]: i8x16, u8x16, m8x16, i16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(m16x8[test_v128]: m32x4, m64x2, m128x1); impl_from_bits!(i32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(u32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(f32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(m32x4[test_v128]: m64x2, m128x1); impl_from_bits!(i64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(u64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, f64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(f64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, m64x2, i128x1, u128x1, m128x1); impl_from_bits!(m64x2[test_v128]: m128x1); impl_from_bits!(i128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, u128x1, m128x1); impl_from_bits!(u128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, m128x1); // note: m128x1 cannot be constructed from all the other masks bit patterns in here packed_simd-0.3.3/src/api/into_bits/v16.rs010064400007650000024000000004711342460246600165240ustar0000000000000000//! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors #![rustfmt::skip] #[allow(unused)] // wasm_bindgen_test use crate::*; impl_from_bits!(i8x2[test_v16]: u8x2, m8x2); impl_from_bits!(u8x2[test_v16]: i8x2, m8x2); // note: m8x2 cannot be constructed from all i8x2 or u8x2 bit patterns packed_simd-0.3.3/src/api/into_bits/v256.rs010064400007650000024000000043651342460246600166200ustar0000000000000000//! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors #![rustfmt::skip] #[allow(unused)] // wasm_bindgen_test use crate::*; impl_from_bits!(i8x32[test_v256]: u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(u8x32[test_v256]: i8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(m8x32[test_v256]: m16x16, m32x8, m64x4, m128x2); impl_from_bits!(i16x16[test_v256]: i8x32, u8x32, m8x32, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(u16x16[test_v256]: i8x32, u8x32, m8x32, i16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(m16x16[test_v256]: m32x8, m64x4, m128x2); impl_from_bits!(i32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(u32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(f32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(m32x8[test_v256]: m64x4, m128x2); impl_from_bits!(i64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(u64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, f64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(f64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, m64x4, i128x2, u128x2, m128x2); impl_from_bits!(m64x4[test_v256]: m128x2); impl_from_bits!(i128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, u128x2, m128x2); impl_from_bits!(u128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, m128x2); // note: m128x2 cannot be constructed from all the other masks bit patterns in here packed_simd-0.3.3/src/api/into_bits/v32.rs010064400007650000024000000010111342460246600165110ustar0000000000000000//! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors #![rustfmt::skip] #[allow(unused)] // wasm_bindgen_test use crate::*; impl_from_bits!(i8x4[test_v32]: u8x4, m8x4, i16x2, u16x2, m16x2); impl_from_bits!(u8x4[test_v32]: i8x4, m8x4, i16x2, u16x2, m16x2); impl_from_bits!(m8x4[test_v32]: m16x2); impl_from_bits!(i16x2[test_v32]: i8x4, u8x4, m8x4, u16x2, m16x2); impl_from_bits!(u16x2[test_v32]: i8x4, u8x4, m8x4, i16x2, m16x2); // note: m16x2 cannot be constructed from all m8x4 bit patterns packed_simd-0.3.3/src/api/into_bits/v512.rs010064400007650000024000000044501342460246600166060ustar0000000000000000//! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors #![rustfmt::skip] #[allow(unused)] // wasm_bindgen_test use crate::*; impl_from_bits!(i8x64[test_v512]: u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(u8x64[test_v512]: i8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(m8x64[test_v512]: m16x32, m32x16, m64x8, m128x4); impl_from_bits!(i16x32[test_v512]: i8x64, u8x64, m8x64, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(u16x32[test_v512]: i8x64, u8x64, m8x64, i16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(m16x32[test_v512]: m32x16, m64x8, m128x4); impl_from_bits!(i32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(u32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(f32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(m32x16[test_v512]: m64x8, m128x4); impl_from_bits!(i64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(u64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, f64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(f64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, m64x8, i128x4, u128x4, m128x4); impl_from_bits!(m64x8[test_v512]: m128x4); impl_from_bits!(i128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, u128x4, m128x4); impl_from_bits!(u128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, m128x4); // note: m128x4 cannot be constructed from all the other masks bit patterns in here packed_simd-0.3.3/src/api/into_bits/v64.rs010064400007650000024000000017151342460246600165310ustar0000000000000000//! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors #![rustfmt::skip] #[allow(unused)] // wasm_bindgen_test use crate::*; impl_from_bits!(i8x8[test_v64]: u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); impl_from_bits!(u8x8[test_v64]: i8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); impl_from_bits!(m8x8[test_v64]: m16x4, m32x2); impl_from_bits!(i16x4[test_v64]: i8x8, u8x8, m8x8, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); impl_from_bits!(u16x4[test_v64]: i8x8, u8x8, m8x8, i16x4, m16x4, i32x2, u32x2, f32x2, m32x2); impl_from_bits!(m16x4[test_v64]: m32x2); impl_from_bits!(i32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, u32x2, f32x2, m32x2); impl_from_bits!(u32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, f32x2, m32x2); impl_from_bits!(f32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, m32x2); // note: m32x2 cannot be constructed from all m16x4 or m8x8 bit patterns packed_simd-0.3.3/src/api/math.rs010064400007650000024000000001011332536563700150430ustar0000000000000000//! Implements vertical math operations #[macro_use] mod float; packed_simd-0.3.3/src/api/math/float.rs010064400007650000024000000016451342460246600161600ustar0000000000000000//! Implements vertical floating-point math operations. #[macro_use] mod abs; #[macro_use] mod consts; #[macro_use] mod cos; #[macro_use] mod exp; #[macro_use] mod powf; #[macro_use] mod ln; #[macro_use] mod mul_add; #[macro_use] mod mul_adde; #[macro_use] mod recpre; #[macro_use] mod rsqrte; #[macro_use] mod sin; #[macro_use] mod sqrt; #[macro_use] mod sqrte; macro_rules! impl_float_category { ([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => { impl $id { #[inline] pub fn is_nan(self) -> $mask_ty { self.ne(self) } #[inline] pub fn is_infinite(self) -> $mask_ty { self.eq(Self::INFINITY) | self.eq(Self::NEG_INFINITY) } #[inline] pub fn is_finite(self) -> $mask_ty { !(self.is_nan() | self.is_infinite()) } } }; } packed_simd-0.3.3/src/api/math/float/abs.rs010064400007650000024000000017251342460246600167240ustar0000000000000000//! Implements vertical (lane-wise) floating-point `abs`. macro_rules! impl_math_float_abs { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Absolute value. #[inline] pub fn abs(self) -> Self { use crate::codegen::math::float::abs::Abs; Abs::abs(self) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_abs>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn abs() { let o = $id::splat(1 as $elem_ty); assert_eq!(o, o.abs()); let mo = $id::splat(-1 as $elem_ty); assert_eq!(o, mo.abs()); } } } } }; } packed_simd-0.3.3/src/api/math/float/consts.rs010064400007650000024000000054771342460246600175000ustar0000000000000000macro_rules! impl_float_consts { ([$elem_ty:ident; $elem_count:expr]: $id:ident) => { impl $id { /// Machine epsilon value. pub const EPSILON: $id = $id::splat(core::$elem_ty::EPSILON); /// Smallest finite value. pub const MIN: $id = $id::splat(core::$elem_ty::MIN); /// Smallest positive normal value. pub const MIN_POSITIVE: $id = $id::splat(core::$elem_ty::MIN_POSITIVE); /// Largest finite value. pub const MAX: $id = $id::splat(core::$elem_ty::MAX); /// Not a Number (NaN). pub const NAN: $id = $id::splat(core::$elem_ty::NAN); /// Infinity (∞). pub const INFINITY: $id = $id::splat(core::$elem_ty::INFINITY); /// Negative infinity (-∞). pub const NEG_INFINITY: $id = $id::splat(core::$elem_ty::NEG_INFINITY); /// Archimedes' constant (π) pub const PI: $id = $id::splat(core::$elem_ty::consts::PI); /// π/2 pub const FRAC_PI_2: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_2); /// π/3 pub const FRAC_PI_3: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_3); /// π/4 pub const FRAC_PI_4: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_4); /// π/6 pub const FRAC_PI_6: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_6); /// π/8 pub const FRAC_PI_8: $id = $id::splat(core::$elem_ty::consts::FRAC_PI_8); /// 1/π pub const FRAC_1_PI: $id = $id::splat(core::$elem_ty::consts::FRAC_1_PI); /// 2/π pub const FRAC_2_PI: $id = $id::splat(core::$elem_ty::consts::FRAC_2_PI); /// 2/sqrt(π) pub const FRAC_2_SQRT_PI: $id = $id::splat(core::$elem_ty::consts::FRAC_2_SQRT_PI); /// sqrt(2) pub const SQRT_2: $id = $id::splat(core::$elem_ty::consts::SQRT_2); /// 1/sqrt(2) pub const FRAC_1_SQRT_2: $id = $id::splat(core::$elem_ty::consts::FRAC_1_SQRT_2); /// Euler's number (e) pub const E: $id = $id::splat(core::$elem_ty::consts::E); /// log2(e) pub const LOG2_E: $id = $id::splat(core::$elem_ty::consts::LOG2_E); /// log10(e) pub const LOG10_E: $id = $id::splat(core::$elem_ty::consts::LOG10_E); /// ln(2) pub const LN_2: $id = $id::splat(core::$elem_ty::consts::LN_2); /// ln(10) pub const LN_10: $id = $id::splat(core::$elem_ty::consts::LN_10); } }; } packed_simd-0.3.3/src/api/math/float/cos.rs010064400007650000024000000030411342460246600167340ustar0000000000000000//! Implements vertical (lane-wise) floating-point `cos`. macro_rules! impl_math_float_cos { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Cosine. #[inline] pub fn cos(self) -> Self { use crate::codegen::math::float::cos::Cos; Cos::cos(self) } /// Cosine of `self * PI`. #[inline] pub fn cos_pi(self) -> Self { use crate::codegen::math::float::cos_pi::CosPi; CosPi::cos_pi(self) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_cos>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn cos() { use crate::$elem_ty::consts::PI; let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let p = $id::splat(PI as $elem_ty); let ph = $id::splat(PI as $elem_ty / 2.); let z_r = $id::splat((PI as $elem_ty / 2.).cos()); let o_r = $id::splat((PI as $elem_ty).cos()); assert_eq!(o, z.cos()); assert_eq!(z_r, ph.cos()); assert_eq!(o_r, p.cos()); } } } } }; } packed_simd-0.3.3/src/api/math/float/exp.rs010064400007650000024000000022371342460246600167520ustar0000000000000000//! Implements vertical (lane-wise) floating-point `exp`. macro_rules! impl_math_float_exp { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Returns the exponential function of `self`: `e^(self)`. #[inline] pub fn exp(self) -> Self { use crate::codegen::math::float::exp::Exp; Exp::exp(self) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_exp>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn exp() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); assert_eq!(o, z.exp()); let e = $id::splat(crate::f64::consts::E as $elem_ty); let tol = $id::splat(2.4e-4 as $elem_ty); assert!((e - o.exp()).abs().le(tol).all()); } } } } }; } packed_simd-0.3.3/src/api/math/float/ln.rs010064400007650000024000000022051342460246600165620ustar0000000000000000//! Implements vertical (lane-wise) floating-point `ln`. macro_rules! impl_math_float_ln { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Returns the natural logarithm of `self`. #[inline] pub fn ln(self) -> Self { use crate::codegen::math::float::ln::Ln; Ln::ln(self) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_ln>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn ln() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); assert_eq!(z, o.ln()); let e = $id::splat(crate::f64::consts::E as $elem_ty); let tol = $id::splat(2.4e-4 as $elem_ty); assert!((o - e.ln()).abs().le(tol).all()); } } } } }; } packed_simd-0.3.3/src/api/math/float/mul_add.rs010064400007650000024000000032441342460246600175620ustar0000000000000000//! Implements vertical (lane-wise) floating-point `mul_add`. macro_rules! impl_math_float_mul_add { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Fused multiply add: `self * y + z` #[inline] pub fn mul_add(self, y: Self, z: Self) -> Self { use crate::codegen::math::float::mul_add::MulAdd; MulAdd::mul_add(self, y, z) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_mul_add>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn mul_add() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); let t3 = $id::splat(3 as $elem_ty); let f = $id::splat(4 as $elem_ty); assert_eq!(z, z.mul_add(z, z)); assert_eq!(o, o.mul_add(o, z)); assert_eq!(o, o.mul_add(z, o)); assert_eq!(o, z.mul_add(o, o)); assert_eq!(t, o.mul_add(o, o)); assert_eq!(t, o.mul_add(t, z)); assert_eq!(t, t.mul_add(o, z)); assert_eq!(f, t.mul_add(t, z)); assert_eq!(f, t.mul_add(o, t)); assert_eq!(t3, t.mul_add(o, o)); } } } } }; } packed_simd-0.3.3/src/api/math/float/mul_adde.rs010064400007650000024000000037401342460246600177300ustar0000000000000000//! Implements vertical (lane-wise) floating-point `mul_adde`. macro_rules! impl_math_float_mul_adde { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Fused multiply add estimate: ~= `self * y + z` /// /// While fused multiply-add (`fma`) has infinite precision, /// `mul_adde` has _at worst_ the same precision of a multiply followed by an add. /// This might be more efficient on architectures that do not have an `fma` instruction. #[inline] pub fn mul_adde(self, y: Self, z: Self) -> Self { use crate::codegen::math::float::mul_adde::MulAddE; MulAddE::mul_adde(self, y, z) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_mul_adde>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn mul_adde() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); let t3 = $id::splat(3 as $elem_ty); let f = $id::splat(4 as $elem_ty); assert_eq!(z, z.mul_adde(z, z)); assert_eq!(o, o.mul_adde(o, z)); assert_eq!(o, o.mul_adde(z, o)); assert_eq!(o, z.mul_adde(o, o)); assert_eq!(t, o.mul_adde(o, o)); assert_eq!(t, o.mul_adde(t, z)); assert_eq!(t, t.mul_adde(o, z)); assert_eq!(f, t.mul_adde(t, z)); assert_eq!(f, t.mul_adde(o, t)); assert_eq!(t3, t.mul_adde(o, o)); } } } } }; } packed_simd-0.3.3/src/api/math/float/powf.rs010064400007650000024000000024401342460246600171250ustar0000000000000000//! Implements vertical (lane-wise) floating-point `powf`. macro_rules! impl_math_float_powf { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Raises `self` number to the floating point power of `x`. #[inline] pub fn powf(self, x: Self) -> Self { use crate::codegen::math::float::powf::Powf; Powf::powf(self, x) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_powf>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn powf() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); assert_eq!(o, o.powf(z)); assert_eq!(o, t.powf(z)); assert_eq!(o, o.powf(o)); assert_eq!(t, t.powf(o)); let f = $id::splat(4 as $elem_ty); assert_eq!(f, t.powf(t)); } } } } }; } packed_simd-0.3.3/src/api/math/float/recpre.rs010064400007650000024000000024141342460246600174330ustar0000000000000000//! Implements vertical (lane-wise) floating-point `recpre`. macro_rules! impl_math_float_recpre { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Reciprocal estimate: `~= 1. / self`. /// /// FIXME: The precision of the estimate is currently unspecified. #[inline] pub fn recpre(self) -> Self { $id::splat(1.) / self } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_recpre>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn recpre() { let tol = $id::splat(2.4e-4 as $elem_ty); let o = $id::splat(1 as $elem_ty); let error = (o - o.recpre()).abs(); assert!(error.le(tol).all()); let t = $id::splat(2 as $elem_ty); let e = 0.5; let error = (e - t.recpre()).abs(); assert!(error.le(tol).all()); } } } } }; } packed_simd-0.3.3/src/api/math/float/rsqrte.rs010064400007650000024000000027301342460246600174740ustar0000000000000000//! Implements vertical (lane-wise) floating-point `rsqrte`. macro_rules! impl_math_float_rsqrte { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Reciprocal square-root estimate: `~= 1. / self.sqrt()`. /// /// FIXME: The precision of the estimate is currently unspecified. #[inline] pub fn rsqrte(self) -> Self { unsafe { use crate::llvm::simd_fsqrt; $id::splat(1.) / Simd(simd_fsqrt(self.0)) } } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_rsqrte>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn rsqrte() { use crate::$elem_ty::consts::SQRT_2; let tol = $id::splat(2.4e-4 as $elem_ty); let o = $id::splat(1 as $elem_ty); let error = (o - o.rsqrte()).abs(); assert!(error.le(tol).all()); let t = $id::splat(2 as $elem_ty); let e = 1. / SQRT_2; let error = (e - t.rsqrte()).abs(); assert!(error.le(tol).all()); } } } } }; } packed_simd-0.3.3/src/api/math/float/sin.rs010064400007650000024000000033371342460246600167510ustar0000000000000000//! Implements vertical (lane-wise) floating-point `sin`. macro_rules! impl_math_float_sin { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Sine. #[inline] pub fn sin(self) -> Self { use crate::codegen::math::float::sin::Sin; Sin::sin(self) } /// Sine of `self * PI`. #[inline] pub fn sin_pi(self) -> Self { use crate::codegen::math::float::sin_pi::SinPi; SinPi::sin_pi(self) } /// Sine and cosine of `self * PI`. #[inline] pub fn sin_cos_pi(self) -> (Self, Self) { use crate::codegen::math::float::sin_cos_pi::SinCosPi; SinCosPi::sin_cos_pi(self) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_sin>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn sin() { use crate::$elem_ty::consts::PI; let z = $id::splat(0 as $elem_ty); let p = $id::splat(PI as $elem_ty); let ph = $id::splat(PI as $elem_ty / 2.); let o_r = $id::splat((PI as $elem_ty / 2.).sin()); let z_r = $id::splat((PI as $elem_ty).sin()); assert_eq!(z, z.sin()); assert_eq!(o_r, ph.sin()); assert_eq!(z_r, p.sin()); } } } } }; } packed_simd-0.3.3/src/api/math/float/sqrt.rs010064400007650000024000000022331342460246600171430ustar0000000000000000//! Implements vertical (lane-wise) floating-point `sqrt`. macro_rules! impl_math_float_sqrt { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { #[inline] pub fn sqrt(self) -> Self { use crate::codegen::math::float::sqrt::Sqrt; Sqrt::sqrt(self) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_sqrt>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn sqrt() { use crate::$elem_ty::consts::SQRT_2; let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); assert_eq!(z, z.sqrt()); assert_eq!(o, o.sqrt()); let t = $id::splat(2 as $elem_ty); let e = $id::splat(SQRT_2); assert_eq!(e, t.sqrt()); } } } } }; } packed_simd-0.3.3/src/api/math/float/sqrte.rs010064400007650000024000000030711342460246600173110ustar0000000000000000//! Implements vertical (lane-wise) floating-point `sqrte`. macro_rules! impl_math_float_sqrte { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Square-root estimate. /// /// FIXME: The precision of the estimate is currently unspecified. #[inline] pub fn sqrte(self) -> Self { use crate::codegen::math::float::sqrte::Sqrte; Sqrte::sqrte(self) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _math_sqrte>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn sqrte() { use crate::$elem_ty::consts::SQRT_2; let tol = $id::splat(2.4e-4 as $elem_ty); let z = $id::splat(0 as $elem_ty); let error = (z - z.sqrte()).abs(); assert!(error.le(tol).all()); let o = $id::splat(1 as $elem_ty); let error = (o - o.sqrte()).abs(); assert!(error.le(tol).all()); let t = $id::splat(2 as $elem_ty); let e = $id::splat(SQRT_2 as $elem_ty); let error = (e - t.sqrte()).abs(); assert!(error.le(tol).all()); } } } } }; } packed_simd-0.3.3/src/api/minimal.rs010064400007650000024000000001031333454520600155320ustar0000000000000000#[macro_use] mod iuf; #[macro_use] mod mask; #[macro_use] mod ptr; packed_simd-0.3.3/src/api/minimal/iuf.rs010064400007650000024000000151371342460246600163340ustar0000000000000000//! Minimal API of signed integer, unsigned integer, and floating-point //! vectors. macro_rules! impl_minimal_iuf { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { $(#[$doc])* pub type $id = Simd<[$elem_ty; $elem_count]>; impl sealed::Simd for $id { type Element = $elem_ty; const LANES: usize = $elem_count; type LanesType = [u32; $elem_count]; } impl $id { /// Creates a new instance with each vector elements initialized /// with the provided values. #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::too_many_arguments))] pub const fn new($($elem_name: $elem_ty),*) -> Self { Simd(codegen::$id($($elem_name as $ielem_ty),*)) } /// Returns the number of vector lanes. #[inline] pub const fn lanes() -> usize { $elem_count } /// Constructs a new instance with each element initialized to /// `value`. #[inline] pub const fn splat(value: $elem_ty) -> Self { Simd(codegen::$id($({ #[allow(non_camel_case_types, dead_code)] struct $elem_name; value as $ielem_ty }),*)) } /// Extracts the value at `index`. /// /// # Panics /// /// If `index >= Self::lanes()`. #[inline] pub fn extract(self, index: usize) -> $elem_ty { assert!(index < $elem_count); unsafe { self.extract_unchecked(index) } } /// Extracts the value at `index`. /// /// # Precondition /// /// If `index >= Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { use crate::llvm::simd_extract; let e: $ielem_ty = simd_extract(self.0, index as u32); e as $elem_ty } /// Returns a new vector where the value at `index` is replaced by `new_value`. /// /// # Panics /// /// If `index >= Self::lanes()`. #[inline] #[must_use = "replace does not modify the original value - \ it returns a new vector with the value at `index` \ replaced by `new_value`d" ] pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { assert!(index < $elem_count); unsafe { self.replace_unchecked(index, new_value) } } /// Returns a new vector where the value at `index` is replaced by `new_value`. /// /// # Precondition /// /// If `index >= Self::lanes()` the behavior is undefined. #[inline] #[must_use = "replace_unchecked does not modify the original value - \ it returns a new vector with the value at `index` \ replaced by `new_value`d" ] pub unsafe fn replace_unchecked( self, index: usize, new_value: $elem_ty, ) -> Self { use crate::llvm::simd_insert; Simd(simd_insert(self.0, index as u32, new_value as $ielem_ty)) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _minimal>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn minimal() { // lanes: assert_eq!($elem_count, $id::lanes()); // splat and extract / extract_unchecked: const VAL: $elem_ty = 7 as $elem_ty; const VEC: $id = $id::splat(VAL); for i in 0..$id::lanes() { assert_eq!(VAL, VEC.extract(i)); assert_eq!( VAL, unsafe { VEC.extract_unchecked(i) } ); } // replace / replace_unchecked let new_vec = VEC.replace(0, 42 as $elem_ty); for i in 0..$id::lanes() { if i == 0 { assert_eq!(42 as $elem_ty, new_vec.extract(i)); } else { assert_eq!(VAL, new_vec.extract(i)); } } let new_vec = unsafe { VEC.replace_unchecked(0, 42 as $elem_ty) }; for i in 0..$id::lanes() { if i == 0 { assert_eq!(42 as $elem_ty, new_vec.extract(i)); } else { assert_eq!(VAL, new_vec.extract(i)); } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn extract_panic_oob() { const VAL: $elem_ty = 7 as $elem_ty; const VEC: $id = $id::splat(VAL); let _ = VEC.extract($id::lanes()); } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn replace_panic_oob() { const VAL: $elem_ty = 7 as $elem_ty; const VEC: $id = $id::splat(VAL); let _ = VEC.replace($id::lanes(), 42 as $elem_ty); } } } } } } packed_simd-0.3.3/src/api/minimal/mask.rs010064400007650000024000000153441342460513300164760ustar0000000000000000//! Minimal API of mask vectors. macro_rules! impl_minimal_mask { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { $(#[$doc])* pub type $id = Simd<[$elem_ty; $elem_count]>; impl sealed::Simd for $id { type Element = $elem_ty; const LANES: usize = $elem_count; type LanesType = [u32; $elem_count]; } impl $id { /// Creates a new instance with each vector elements initialized /// with the provided values. #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::too_many_arguments))] pub const fn new($($elem_name: bool),*) -> Self { Simd(codegen::$id($(Self::bool_to_internal($elem_name)),*)) } /// Converts a boolean type into the type of the vector lanes. #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::indexing_slicing))] const fn bool_to_internal(x: bool) -> $ielem_ty { [0 as $ielem_ty, !(0 as $ielem_ty)][x as usize] } /// Returns the number of vector lanes. #[inline] pub const fn lanes() -> usize { $elem_count } /// Constructs a new instance with each element initialized to /// `value`. #[inline] pub const fn splat(value: bool) -> Self { Simd(codegen::$id($({ #[allow(non_camel_case_types, dead_code)] struct $elem_name; Self::bool_to_internal(value) }),*)) } /// Extracts the value at `index`. /// /// # Panics /// /// If `index >= Self::lanes()`. #[inline] pub fn extract(self, index: usize) -> bool { assert!(index < $elem_count); unsafe { self.extract_unchecked(index) } } /// Extracts the value at `index`. /// /// If `index >= Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn extract_unchecked(self, index: usize) -> bool { use crate::llvm::simd_extract; let x: $ielem_ty = simd_extract(self.0, index as u32); x != 0 } /// Returns a new vector where the value at `index` is replaced by /// `new_value`. /// /// # Panics /// /// If `index >= Self::lanes()`. #[inline] #[must_use = "replace does not modify the original value - \ it returns a new vector with the value at `index` \ replaced by `new_value`d" ] pub fn replace(self, index: usize, new_value: bool) -> Self { assert!(index < $elem_count); unsafe { self.replace_unchecked(index, new_value) } } /// Returns a new vector where the value at `index` is replaced by /// `new_value`. /// /// # Panics /// /// If `index >= Self::lanes()`. #[inline] #[must_use = "replace_unchecked does not modify the original value - \ it returns a new vector with the value at `index` \ replaced by `new_value`d" ] pub unsafe fn replace_unchecked( self, index: usize, new_value: bool, ) -> Self { use crate::llvm::simd_insert; Simd(simd_insert(self.0, index as u32, Self::bool_to_internal(new_value))) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _minimal>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn minimal() { // TODO: test new // lanes: assert_eq!($elem_count, $id::lanes()); // splat and extract / extract_unchecked: let vec = $id::splat(true); for i in 0..$id::lanes() { assert_eq!(true, vec.extract(i)); assert_eq!(true, unsafe { vec.extract_unchecked(i) } ); } // replace / replace_unchecked let new_vec = vec.replace(0, false); for i in 0..$id::lanes() { if i == 0 { assert_eq!(false, new_vec.extract(i)); } else { assert_eq!(true, new_vec.extract(i)); } } let new_vec = unsafe { vec.replace_unchecked(0, false) }; for i in 0..$id::lanes() { if i == 0 { assert_eq!(false, new_vec.extract(i)); } else { assert_eq!(true, new_vec.extract(i)); } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn extract_panic_oob() { let vec = $id::splat(false); let _ = vec.extract($id::lanes()); } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn replace_panic_oob() { let vec = $id::splat(false); let _ = vec.replace($id::lanes(), true); } } } } } } packed_simd-0.3.3/src/api/minimal/ptr.rs010064400007650000024000001645361342460246600163660ustar0000000000000000//! Minimal API of pointer vectors. macro_rules! impl_minimal_p { ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident, $usize_ty:ident, $isize_ty:ident | $ref:ident | $test_tt:tt | $($elem_name:ident),+ | ($true:expr, $false:expr) | $(#[$doc:meta])*) => { $(#[$doc])* pub type $id = Simd<[$elem_ty; $elem_count]>; impl sealed::Simd for $id { type Element = $elem_ty; const LANES: usize = $elem_count; type LanesType = [u32; $elem_count]; } impl $id { /// Creates a new instance with each vector elements initialized /// with the provided values. #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::too_many_arguments))] pub const fn new($($elem_name: $elem_ty),*) -> Self { Simd(codegen::$id($($elem_name),*)) } /// Returns the number of vector lanes. #[inline] pub const fn lanes() -> usize { $elem_count } /// Constructs a new instance with each element initialized to /// `value`. #[inline] pub const fn splat(value: $elem_ty) -> Self { Simd(codegen::$id($({ #[allow(non_camel_case_types, dead_code)] struct $elem_name; value }),*)) } /// Constructs a new instance with each element initialized to /// `null`. #[inline] pub const fn null() -> Self { Self::splat(crate::ptr::null_mut() as $elem_ty) } /// Returns a mask that selects those lanes that contain `null` /// pointers. #[inline] pub fn is_null(self) -> $mask_ty { self.eq(Self::null()) } /// Extracts the value at `index`. /// /// # Panics /// /// If `index >= Self::lanes()`. #[inline] pub fn extract(self, index: usize) -> $elem_ty { assert!(index < $elem_count); unsafe { self.extract_unchecked(index) } } /// Extracts the value at `index`. /// /// # Precondition /// /// If `index >= Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { use crate::llvm::simd_extract; simd_extract(self.0, index as u32) } /// Returns a new vector where the value at `index` is replaced by /// `new_value`. /// /// # Panics /// /// If `index >= Self::lanes()`. #[inline] #[must_use = "replace does not modify the original value - \ it returns a new vector with the value at `index` \ replaced by `new_value`d" ] #[cfg_attr(feature = "cargo-clippy", allow(clippy::not_unsafe_ptr_arg_deref))] pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { assert!(index < $elem_count); unsafe { self.replace_unchecked(index, new_value) } } /// Returns a new vector where the value at `index` is replaced by `new_value`. /// /// # Precondition /// /// If `index >= Self::lanes()` the behavior is undefined. #[inline] #[must_use = "replace_unchecked does not modify the original value - \ it returns a new vector with the value at `index` \ replaced by `new_value`d" ] pub unsafe fn replace_unchecked( self, index: usize, new_value: $elem_ty, ) -> Self { use crate::llvm::simd_insert; Simd(simd_insert(self.0, index as u32, new_value)) } } test_if!{ $test_tt: paste::item! { pub mod [<$id _minimal>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn minimal() { // lanes: assert_eq!($elem_count, $id::::lanes()); // splat and extract / extract_unchecked: let VAL7: <$id as sealed::Simd>::Element = $ref!(7); let VAL42: <$id as sealed::Simd>::Element = $ref!(42); let VEC: $id = $id::splat(VAL7); for i in 0..$id::::lanes() { assert_eq!(VAL7, VEC.extract(i)); assert_eq!( VAL7, unsafe { VEC.extract_unchecked(i) } ); } // replace / replace_unchecked let new_vec = VEC.replace(0, VAL42); for i in 0..$id::::lanes() { if i == 0 { assert_eq!(VAL42, new_vec.extract(i)); } else { assert_eq!(VAL7, new_vec.extract(i)); } } let new_vec = unsafe { VEC.replace_unchecked(0, VAL42) }; for i in 0..$id::::lanes() { if i == 0 { assert_eq!(VAL42, new_vec.extract(i)); } else { assert_eq!(VAL7, new_vec.extract(i)); } } let mut n = $id::::null(); assert_eq!( n, $id::::splat(unsafe { crate::mem::zeroed() }) ); assert!(n.is_null().all()); n = n.replace( 0, unsafe { crate::mem::transmute(1_isize) } ); assert!(!n.is_null().all()); if $id::::lanes() > 1 { assert!(n.is_null().any()); } else { assert!(!n.is_null().any()); } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn extract_panic_oob() { let VAL: <$id as sealed::Simd>::Element = $ref!(7); let VEC: $id = $id::splat(VAL); let _ = VEC.extract($id::::lanes()); } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn replace_panic_oob() { let VAL: <$id as sealed::Simd>::Element = $ref!(7); let VAL42: <$id as sealed::Simd>::Element = $ref!(42); let VEC: $id = $id::splat(VAL); let _ = VEC.replace($id::::lanes(), VAL42); } } } } impl crate::fmt::Debug for $id { #[cfg_attr(feature = "cargo-clippy", allow(clippy::missing_inline_in_public_items))] fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result { write!( f, "{}<{}>(", stringify!($id), unsafe { crate::intrinsics::type_name::() } )?; for i in 0..$elem_count { if i > 0 { write!(f, ", ")?; } self.extract(i).fmt(f)?; } write!(f, ")") } } test_if!{ $test_tt: paste::item! { pub mod [<$id _fmt_debug>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn debug() { use arrayvec::{ArrayString,ArrayVec}; type TinyString = ArrayString<[u8; 512]>; use crate::fmt::Write; let v = $id::::default(); let mut s = TinyString::new(); write!(&mut s, "{:?}", v).unwrap(); let mut beg = TinyString::new(); write!(&mut beg, "{}(", stringify!($id)).unwrap(); assert!( s.starts_with(beg.as_str()), "s = {} (should start with = {})", s, beg ); assert!(s.ends_with(")")); let s: ArrayVec<[TinyString; 64]> = s.replace(beg.as_str(), "") .replace(")", "").split(",") .map(|v| TinyString::from(v.trim()).unwrap()) .collect(); assert_eq!(s.len(), $id::::lanes()); for (index, ss) in s.into_iter().enumerate() { let mut e = TinyString::new(); write!(&mut e, "{:?}", v.extract(index)).unwrap(); assert_eq!(ss, e); } } } } } impl Default for $id { #[inline] fn default() -> Self { // FIXME: ptrs do not implement default Self::null() } } test_if!{ $test_tt: paste::item! { pub mod [<$id _default>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn default() { let a = $id::::default(); for i in 0..$id::::lanes() { assert_eq!( a.extract(i), unsafe { crate::mem::zeroed() } ); } } } } } impl $id { /// Lane-wise equality comparison. #[inline] pub fn eq(self, other: Self) -> $mask_ty { unsafe { use crate::llvm::simd_eq; let a: $usize_ty = crate::mem::transmute(self); let b: $usize_ty = crate::mem::transmute(other); Simd(simd_eq(a.0, b.0)) } } /// Lane-wise inequality comparison. #[inline] pub fn ne(self, other: Self) -> $mask_ty { unsafe { use crate::llvm::simd_ne; let a: $usize_ty = crate::mem::transmute(self); let b: $usize_ty = crate::mem::transmute(other); Simd(simd_ne(a.0, b.0)) } } /// Lane-wise less-than comparison. #[inline] pub fn lt(self, other: Self) -> $mask_ty { unsafe { use crate::llvm::simd_lt; let a: $usize_ty = crate::mem::transmute(self); let b: $usize_ty = crate::mem::transmute(other); Simd(simd_lt(a.0, b.0)) } } /// Lane-wise less-than-or-equals comparison. #[inline] pub fn le(self, other: Self) -> $mask_ty { unsafe { use crate::llvm::simd_le; let a: $usize_ty = crate::mem::transmute(self); let b: $usize_ty = crate::mem::transmute(other); Simd(simd_le(a.0, b.0)) } } /// Lane-wise greater-than comparison. #[inline] pub fn gt(self, other: Self) -> $mask_ty { unsafe { use crate::llvm::simd_gt; let a: $usize_ty = crate::mem::transmute(self); let b: $usize_ty = crate::mem::transmute(other); Simd(simd_gt(a.0, b.0)) } } /// Lane-wise greater-than-or-equals comparison. #[inline] pub fn ge(self, other: Self) -> $mask_ty { unsafe { use crate::llvm::simd_ge; let a: $usize_ty = crate::mem::transmute(self); let b: $usize_ty = crate::mem::transmute(other); Simd(simd_ge(a.0, b.0)) } } } test_if!{ $test_tt: paste::item! { pub mod [<$id _cmp_vertical>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn cmp() { let a = $id::::null(); let b = $id::::splat(unsafe { crate::mem::transmute(1_isize) }); let r = a.lt(b); let e = $mask_ty::splat(true); assert!(r == e); let r = a.le(b); assert!(r == e); let e = $mask_ty::splat(false); let r = a.gt(b); assert!(r == e); let r = a.ge(b); assert!(r == e); let r = a.eq(b); assert!(r == e); let mut a = a; let mut b = b; let mut e = e; for i in 0..$id::::lanes() { if i % 2 == 0 { a = a.replace( i, unsafe { crate::mem::transmute(0_isize) } ); b = b.replace( i, unsafe { crate::mem::transmute(1_isize) } ); e = e.replace(i, true); } else { a = a.replace( i, unsafe { crate::mem::transmute(1_isize) } ); b = b.replace( i, unsafe { crate::mem::transmute(0_isize) } ); e = e.replace(i, false); } } let r = a.lt(b); assert!(r == e); } } } } #[cfg_attr(feature = "cargo-clippy", allow(clippy::partialeq_ne_impl))] impl crate::cmp::PartialEq<$id> for $id { #[inline] fn eq(&self, other: &Self) -> bool { $id::::eq(*self, *other).all() } #[inline] fn ne(&self, other: &Self) -> bool { $id::::ne(*self, *other).any() } } // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 #[cfg_attr(feature = "cargo-clippy", allow(clippy::partialeq_ne_impl))] impl crate::cmp::PartialEq>> for LexicographicallyOrdered<$id> { #[inline] fn eq(&self, other: &Self) -> bool { self.0 == other.0 } #[inline] fn ne(&self, other: &Self) -> bool { self.0 != other.0 } } test_if!{ $test_tt: paste::item! { pub mod [<$id _cmp_PartialEq>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn partial_eq() { let a = $id::::null(); let b = $id::::splat(unsafe { crate::mem::transmute(1_isize) }); assert!(a != b); assert!(!(a == b)); assert!(a == a); assert!(!(a != a)); if $id::::lanes() > 1 { let a = $id::::null().replace(0, unsafe { crate::mem::transmute(1_isize) }); let b = $id::::splat(unsafe { crate::mem::transmute(1_isize) }); assert!(a != b); assert!(!(a == b)); assert!(a == a); assert!(!(a != a)); } } } } } impl crate::cmp::Eq for $id {} impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} test_if!{ $test_tt: paste::item! { pub mod [<$id _cmp_eq>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn eq() { fn foo(_: E) {} let a = $id::::null(); foo(a); } } } } impl From<[$elem_ty; $elem_count]> for $id { #[inline] fn from(array: [$elem_ty; $elem_count]) -> Self { unsafe { // FIXME: unnecessary zeroing; better than UB. let mut u: Self = crate::mem::zeroed(); crate::ptr::copy_nonoverlapping( &array as *const [$elem_ty; $elem_count] as *const u8, &mut u as *mut Self as *mut u8, crate::mem::size_of::() ); u } } } impl Into<[$elem_ty; $elem_count]> for $id { #[inline] fn into(self) -> [$elem_ty; $elem_count] { unsafe { // FIXME: unnecessary zeroing; better than UB. let mut u: [$elem_ty; $elem_count] = crate::mem::zeroed(); crate::ptr::copy_nonoverlapping( &self as *const $id as *const u8, &mut u as *mut [$elem_ty; $elem_count] as *mut u8, crate::mem::size_of::() ); u } } } test_if!{ $test_tt: paste::item! { pub mod [<$id _from>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn array() { let values = [1_i32; $elem_count]; let mut vec: $id = Default::default(); let mut array = [ $id::::null().extract(0); $elem_count ]; for i in 0..$elem_count { let ptr = unsafe { crate::mem::transmute( &values[i] as *const i32 ) }; vec = vec.replace(i, ptr); array[i] = ptr; } // FIXME: there is no impl of From<$id> for [$elem_ty; N] // let a0 = From::from(vec); // assert_eq!(a0, array); #[allow(unused_assignments)] let mut a1 = array; a1 = vec.into(); assert_eq!(a1, array); let v0: $id = From::from(array); assert_eq!(v0, vec); let v1: $id = array.into(); assert_eq!(v1, vec); } } } } impl $id { /// Instantiates a new vector with the values of the `slice`. /// /// # Panics /// /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned /// to an `align_of::()` boundary. #[inline] pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { unsafe { assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked(0) as *const $elem_ty; assert!( target_ptr.align_offset(crate::mem::align_of::()) == 0 ); Self::from_slice_aligned_unchecked(slice) } } /// Instantiates a new vector with the values of the `slice`. /// /// # Panics /// /// If `slice.len() < Self::lanes()`. #[inline] pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { unsafe { assert!(slice.len() >= $elem_count); Self::from_slice_unaligned_unchecked(slice) } } /// Instantiates a new vector with the values of the `slice`. /// /// # Precondition /// /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned /// to an `align_of::()` boundary, the behavior is undefined. #[inline] pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty]) -> Self { #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] *(slice.get_unchecked(0) as *const $elem_ty as *const Self) } /// Instantiates a new vector with the values of the `slice`. /// /// # Precondition /// /// If `slice.len() < Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn from_slice_unaligned_unchecked( slice: &[$elem_ty], ) -> Self { use crate::mem::size_of; let target_ptr = slice.get_unchecked(0) as *const $elem_ty as *const u8; let mut x = Self::splat(crate::ptr::null_mut() as $elem_ty); let self_ptr = &mut x as *mut Self as *mut u8; crate::ptr::copy_nonoverlapping( target_ptr, self_ptr, size_of::(), ); x } } test_if!{ $test_tt: paste::item! { pub mod [<$id _slice_from_slice>] { use super::*; use crate::iter::Iterator; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn from_slice_unaligned() { let (null, non_null) = ptr_vals!($id); let mut unaligned = [ non_null; $id::::lanes() + 1 ]; unaligned[0] = null; let vec = $id::::from_slice_unaligned( &unaligned[1..] ); for (index, &b) in unaligned.iter().enumerate() { if index == 0 { assert_eq!(b, null); } else { assert_eq!(b, non_null); assert_eq!(b, vec.extract(index - 1)); } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn from_slice_unaligned_fail() { let (_null, non_null) = ptr_vals!($id); let unaligned = [non_null; $id::::lanes() + 1]; // the slice is not large enough => panic let _vec = $id::::from_slice_unaligned( &unaligned[2..] ); } union A { data: [<$id as sealed::Simd>::Element; 2 * $id::::lanes()], _vec: $id, } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn from_slice_aligned() { let (null, non_null) = ptr_vals!($id); let mut aligned = A { data: [null; 2 * $id::::lanes()], }; for i in $id::::lanes()..(2 * $id::::lanes()) { unsafe { aligned.data[i] = non_null; } } let vec = unsafe { $id::::from_slice_aligned( &aligned.data[$id::::lanes()..] ) }; for (index, &b) in unsafe { aligned.data.iter().enumerate() } { if index < $id::::lanes() { assert_eq!(b, null); } else { assert_eq!(b, non_null); assert_eq!( b, vec.extract(index - $id::::lanes()) ); } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn from_slice_aligned_fail_lanes() { let (_null, non_null) = ptr_vals!($id); let aligned = A { data: [non_null; 2 * $id::::lanes()], }; // the slice is not large enough => panic let _vec = unsafe { $id::::from_slice_aligned( &aligned.data[2 * $id::::lanes()..] ) }; } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn from_slice_aligned_fail_align() { unsafe { let (null, _non_null) = ptr_vals!($id); let aligned = A { data: [null; 2 * $id::::lanes()], }; // get a pointer to the front of data let ptr = aligned.data.as_ptr(); // offset pointer by one element let ptr = ptr.wrapping_add(1); if ptr.align_offset( crate::mem::align_of::<$id>() ) == 0 { // the pointer is properly aligned, so // from_slice_aligned won't fail here (e.g. this // can happen for i128x1). So we panic to make // the "should_fail" test pass: panic!("ok"); } // create a slice - this is safe, because the // elements of the slice exist, are properly // initialized, and properly aligned: let s = slice::from_raw_parts( ptr, $id::::lanes() ); // this should always panic because the slice // alignment does not match the alignment // requirements for the vector type: let _vec = $id::::from_slice_aligned(s); } } } } } impl $id { /// Writes the values of the vector to the `slice`. /// /// # Panics /// /// If `slice.len() < Self::lanes()` or `&slice[0]` is not /// aligned to an `align_of::()` boundary. #[inline] pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { unsafe { assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty; assert!( target_ptr.align_offset(crate::mem::align_of::()) == 0 ); self.write_to_slice_aligned_unchecked(slice); } } /// Writes the values of the vector to the `slice`. /// /// # Panics /// /// If `slice.len() < Self::lanes()`. #[inline] pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { unsafe { assert!(slice.len() >= $elem_count); self.write_to_slice_unaligned_unchecked(slice); } } /// Writes the values of the vector to the `slice`. /// /// # Precondition /// /// If `slice.len() < Self::lanes()` or `&slice[0]` is not /// aligned to an `align_of::()` boundary, the behavior is /// undefined. #[inline] pub unsafe fn write_to_slice_aligned_unchecked( self, slice: &mut [$elem_ty], ) { #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = self; } /// Writes the values of the vector to the `slice`. /// /// # Precondition /// /// If `slice.len() < Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn write_to_slice_unaligned_unchecked( self, slice: &mut [$elem_ty], ) { let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; let self_ptr = &self as *const Self as *const u8; crate::ptr::copy_nonoverlapping( self_ptr, target_ptr, crate::mem::size_of::(), ); } } test_if!{ $test_tt: paste::item! { pub mod [<$id _slice_write_to_slice>] { use super::*; use crate::iter::Iterator; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn write_to_slice_unaligned() { let (null, non_null) = ptr_vals!($id); let mut unaligned = [null; $id::::lanes() + 1]; let vec = $id::::splat(non_null); vec.write_to_slice_unaligned(&mut unaligned[1..]); for (index, &b) in unaligned.iter().enumerate() { if index == 0 { assert_eq!(b, null); } else { assert_eq!(b, non_null); assert_eq!(b, vec.extract(index - 1)); } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn write_to_slice_unaligned_fail() { let (null, non_null) = ptr_vals!($id); let mut unaligned = [null; $id::::lanes() + 1]; let vec = $id::::splat(non_null); // the slice is not large enough => panic vec.write_to_slice_unaligned(&mut unaligned[2..]); } union A { data: [<$id as sealed::Simd>::Element; 2 * $id::::lanes()], _vec: $id, } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn write_to_slice_aligned() { let (null, non_null) = ptr_vals!($id); let mut aligned = A { data: [null; 2 * $id::::lanes()], }; let vec = $id::::splat(non_null); unsafe { vec.write_to_slice_aligned( &mut aligned.data[$id::::lanes()..] ) }; for (index, &b) in unsafe { aligned.data.iter().enumerate() } { if index < $id::::lanes() { assert_eq!(b, null); } else { assert_eq!(b, non_null); assert_eq!( b, vec.extract(index - $id::::lanes()) ); } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn write_to_slice_aligned_fail_lanes() { let (null, non_null) = ptr_vals!($id); let mut aligned = A { data: [null; 2 * $id::::lanes()], }; let vec = $id::::splat(non_null); // the slice is not large enough => panic unsafe { vec.write_to_slice_aligned( &mut aligned.data[2 * $id::::lanes()..] ) }; } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn write_to_slice_aligned_fail_align() { let (null, non_null) = ptr_vals!($id); unsafe { let mut aligned = A { data: [null; 2 * $id::::lanes()], }; // get a pointer to the front of data let ptr = aligned.data.as_mut_ptr(); // offset pointer by one element let ptr = ptr.wrapping_add(1); if ptr.align_offset( crate::mem::align_of::<$id>() ) == 0 { // the pointer is properly aligned, so // write_to_slice_aligned won't fail here (e.g. // this can happen for i128x1). So we panic to // make the "should_fail" test pass: panic!("ok"); } // create a slice - this is safe, because the // elements of the slice exist, are properly // initialized, and properly aligned: let s = slice::from_raw_parts_mut( ptr, $id::::lanes() ); // this should always panic because the slice // alignment does not match the alignment // requirements for the vector type: let vec = $id::::splat(non_null); vec.write_to_slice_aligned(s); } } } } } impl crate::hash::Hash for $id { #[inline] fn hash(&self, state: &mut H) { let s: $usize_ty = unsafe { crate::mem::transmute(*self) }; s.hash(state) } } test_if! { $test_tt: paste::item! { pub mod [<$id _hash>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn hash() { use crate::hash::{Hash, Hasher}; #[allow(deprecated)] use crate::hash::{SipHasher13}; let values = [1_i32; $elem_count]; let mut vec: $id = Default::default(); let mut array = [ $id::::null().extract(0); $elem_count ]; for i in 0..$elem_count { let ptr = unsafe { crate::mem::transmute( &values[i] as *const i32 ) }; vec = vec.replace(i, ptr); array[i] = ptr; } #[allow(deprecated)] let mut a_hash = SipHasher13::new(); let mut v_hash = a_hash.clone(); array.hash(&mut a_hash); vec.hash(&mut v_hash); assert_eq!(a_hash.finish(), v_hash.finish()); } } } } impl $id { /// Calculates the offset from a pointer. /// /// `count` is in units of `T`; e.g. a count of `3` represents a /// pointer offset of `3 * size_of::()` bytes. /// /// # Safety /// /// If any of the following conditions are violated, the result is /// Undefined Behavior: /// /// * Both the starting and resulting pointer must be either in /// bounds or one byte past the end of an allocated object. /// /// * The computed offset, in bytes, cannot overflow an `isize`. /// /// * The offset being in bounds cannot rely on "wrapping around" /// the address space. That is, the infinite-precision sum, in bytes /// must fit in a `usize`. /// /// The compiler and standard library generally tries to ensure /// allocations never reach a size where an offset is a concern. For /// instance, `Vec` and `Box` ensure they never allocate more than /// `isize::MAX` bytes, so `vec.as_ptr().offset(vec.len() as isize)` /// is always safe. /// /// Most platforms fundamentally can't even construct such an /// allocation. For instance, no known 64-bit platform can ever /// serve a request for 263 bytes due to page-table limitations or /// splitting the address space. However, some 32-bit and 16-bit /// platforms may successfully serve a request for more than /// `isize::MAX` bytes with things like Physical Address Extension. /// As such, memory acquired directly from allocators or memory /// mapped files may be too large to handle with this function. /// /// Consider using `wrapping_offset` instead if these constraints /// are difficult to satisfy. The only advantage of this method is /// that it enables more aggressive compiler optimizations. #[inline] pub unsafe fn offset(self, count: $isize_ty) -> Self { // FIXME: should use LLVM's `add nsw nuw` self.wrapping_offset(count) } /// Calculates the offset from a pointer using wrapping arithmetic. /// /// `count` is in units of `T`; e.g. a count of `3` represents a /// pointer offset of `3 * size_of::()` bytes. /// /// # Safety /// /// The resulting pointer does not need to be in bounds, but it is /// potentially hazardous to dereference (which requires unsafe). /// /// Always use `.offset(count)` instead when possible, because /// offset allows the compiler to optimize better. #[inline] pub fn wrapping_offset(self, count: $isize_ty) -> Self { unsafe { let x: $isize_ty = crate::mem::transmute(self); // note: {+,*} currently performs a `wrapping_{add, mul}` crate::mem::transmute( x + (count * crate::mem::size_of::() as isize) ) } } /// Calculates the distance between two pointers. /// /// The returned value is in units of `T`: the distance in bytes is /// divided by `mem::size_of::()`. /// /// This function is the inverse of offset. /// /// # Safety /// /// If any of the following conditions are violated, the result is /// Undefined Behavior: /// /// * Both the starting and other pointer must be either in bounds /// or one byte past the end of the same allocated object. /// /// * The distance between the pointers, in bytes, cannot overflow /// an `isize`. /// /// * The distance between the pointers, in bytes, must be an exact /// multiple of the size of `T`. /// /// * The distance being in bounds cannot rely on "wrapping around" /// the address space. /// /// The compiler and standard library generally try to ensure /// allocations never reach a size where an offset is a concern. For /// instance, `Vec` and `Box` ensure they never allocate more than /// `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())` /// is always safe. /// /// Most platforms fundamentally can't even construct such an /// allocation. For instance, no known 64-bit platform can ever /// serve a request for 263 bytes due to page-table limitations or /// splitting the address space. However, some 32-bit and 16-bit /// platforms may successfully serve a request for more than /// `isize::MAX` bytes with things like Physical Address Extension. /// As such, memory acquired directly from allocators or memory /// mapped files may be too large to handle with this function. /// /// Consider using wrapping_offset_from instead if these constraints /// are difficult to satisfy. The only advantage of this method is /// that it enables more aggressive compiler optimizations. #[inline] pub unsafe fn offset_from(self, origin: Self) -> $isize_ty { // FIXME: should use LLVM's `sub nsw nuw`. self.wrapping_offset_from(origin) } /// Calculates the distance between two pointers. /// /// The returned value is in units of `T`: the distance in bytes is /// divided by `mem::size_of::()`. /// /// If the address different between the two pointers is not a /// multiple of `mem::size_of::()` then the result of the /// division is rounded towards zero. /// /// Though this method is safe for any two pointers, note that its /// result will be mostly useless if the two pointers aren't into /// the same allocated object, for example if they point to two /// different local variables. #[inline] pub fn wrapping_offset_from(self, origin: Self) -> $isize_ty { let x: $isize_ty = unsafe { crate::mem::transmute(self) }; let y: $isize_ty = unsafe { crate::mem::transmute(origin) }; // note: {-,/} currently perform wrapping_{sub, div} (y - x) / (crate::mem::size_of::() as isize) } /// Calculates the offset from a pointer (convenience for /// `.offset(count as isize)`). /// /// `count` is in units of `T`; e.g. a count of 3 represents a /// pointer offset of `3 * size_of::()` bytes. /// /// # Safety /// /// If any of the following conditions are violated, the result is /// Undefined Behavior: /// /// * Both the starting and resulting pointer must be either in /// bounds or one byte past the end of an allocated object. /// /// * The computed offset, in bytes, cannot overflow an `isize`. /// /// * The offset being in bounds cannot rely on "wrapping around" /// the address space. That is, the infinite-precision sum must fit /// in a `usize`. /// /// The compiler and standard library generally tries to ensure /// allocations never reach a size where an offset is a concern. For /// instance, `Vec` and `Box` ensure they never allocate more than /// `isize::MAX` bytes, so `vec.as_ptr().add(vec.len())` is always /// safe. /// /// Most platforms fundamentally can't even construct such an /// allocation. For instance, no known 64-bit platform can ever /// serve a request for 263 bytes due to page-table limitations or /// splitting the address space. However, some 32-bit and 16-bit /// platforms may successfully serve a request for more than /// `isize::MAX` bytes with things like Physical Address Extension. /// As such, memory acquired directly from allocators or memory /// mapped files may be too large to handle with this function. /// /// Consider using `wrapping_offset` instead if these constraints /// are difficult to satisfy. The only advantage of this method is /// that it enables more aggressive compiler optimizations. #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] pub unsafe fn add(self, count: $usize_ty) -> Self { self.offset(count.cast()) } /// Calculates the offset from a pointer (convenience for /// `.offset((count as isize).wrapping_neg())`). /// /// `count` is in units of T; e.g. a `count` of 3 represents a /// pointer offset of `3 * size_of::()` bytes. /// /// # Safety /// /// If any of the following conditions are violated, the result is /// Undefined Behavior: /// /// * Both the starting and resulting pointer must be either in /// bounds or one byte past the end of an allocated object. /// /// * The computed offset cannot exceed `isize::MAX` **bytes**. /// /// * The offset being in bounds cannot rely on "wrapping around" /// the address space. That is, the infinite-precision sum must fit /// in a usize. /// /// The compiler and standard library generally tries to ensure /// allocations never reach a size where an offset is a concern. For /// instance, `Vec` and `Box` ensure they never allocate more than /// `isize::MAX` bytes, so /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe. /// /// Most platforms fundamentally can't even construct such an /// allocation. For instance, no known 64-bit platform can ever /// serve a request for 263 bytes due to page-table /// limitations or splitting the address space. However, some 32-bit /// and 16-bit platforms may successfully serve a request for more /// than `isize::MAX` bytes with things like Physical Address /// Extension. As such, memory acquired directly from allocators or /// memory mapped files *may* be too large to handle with this /// function. /// /// Consider using `wrapping_offset` instead if these constraints /// are difficult to satisfy. The only advantage of this method is /// that it enables more aggressive compiler optimizations. #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] pub unsafe fn sub(self, count: $usize_ty) -> Self { let x: $isize_ty = count.cast(); // note: - is currently wrapping_neg self.offset(-x) } /// Calculates the offset from a pointer using wrapping arithmetic. /// (convenience for `.wrapping_offset(count as isize)`) /// /// `count` is in units of T; e.g. a `count` of 3 represents a /// pointer offset of `3 * size_of::()` bytes. /// /// # Safety /// /// The resulting pointer does not need to be in bounds, but it is /// potentially hazardous to dereference (which requires `unsafe`). /// /// Always use `.add(count)` instead when possible, because `add` /// allows the compiler to optimize better. #[inline] pub fn wrapping_add(self, count: $usize_ty) -> Self { self.wrapping_offset(count.cast()) } /// Calculates the offset from a pointer using wrapping arithmetic. /// (convenience for `.wrapping_offset((count as /// isize).wrapping_sub())`) /// /// `count` is in units of T; e.g. a `count` of 3 represents a /// pointer offset of `3 * size_of::()` bytes. /// /// # Safety /// /// The resulting pointer does not need to be in bounds, but it is /// potentially hazardous to dereference (which requires `unsafe`). /// /// Always use `.sub(count)` instead when possible, because `sub` /// allows the compiler to optimize better. #[inline] pub fn wrapping_sub(self, count: $usize_ty) -> Self { let x: $isize_ty = count.cast(); self.wrapping_offset(-1 * x) } } impl $id { /// Shuffle vector elements according to `indices`. #[inline] pub fn shuffle1_dyn(self, indices: I) -> Self where Self: codegen::shuffle1_dyn::Shuffle1Dyn, { codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) } } test_if! { $test_tt: paste::item! { pub mod [<$id _shuffle1_dyn>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn shuffle1_dyn() { let (null, non_null) = ptr_vals!($id); // alternating = [non_null, null, non_null, null, ...] let mut alternating = $id::::splat(null); for i in 0..$id::::lanes() { if i % 2 == 0 { alternating = alternating.replace(i, non_null); } } type Indices = <$id as codegen::shuffle1_dyn::Shuffle1Dyn>::Indices; // even = [0, 0, 2, 2, 4, 4, ..] let even = { let mut v = Indices::splat(0); for i in 0..$id::::lanes() { if i % 2 == 0 { v = v.replace(i, (i as u8).into()); } else { v = v.replace(i, (i as u8 - 1).into()); } } v }; // odd = [1, 1, 3, 3, 5, 5, ...] let odd = { let mut v = Indices::splat(0); for i in 0..$id::::lanes() { if i % 2 != 0 { v = v.replace(i, (i as u8).into()); } else { v = v.replace(i, (i as u8 + 1).into()); } } v }; assert_eq!( alternating.shuffle1_dyn(even), $id::::splat(non_null) ); if $id::::lanes() > 1 { assert_eq!( alternating.shuffle1_dyn(odd), $id::::splat(null) ); } } } } } }; } packed_simd-0.3.3/src/api/ops.rs010064400007650000024000000007161333454520600147170ustar0000000000000000//! Implementation of the `ops` traits #[macro_use] mod vector_mask_bitwise; #[macro_use] mod scalar_mask_bitwise; #[macro_use] mod vector_arithmetic; #[macro_use] mod scalar_arithmetic; #[macro_use] mod vector_bitwise; #[macro_use] mod scalar_bitwise; #[macro_use] mod vector_shifts; #[macro_use] mod scalar_shifts; #[macro_use] mod vector_rotates; #[macro_use] mod vector_neg; #[macro_use] mod vector_int_min_max; #[macro_use] mod vector_float_min_max; packed_simd-0.3.3/src/api/ops/scalar_arithmetic.rs010064400007650000024000000156661342460246600204110ustar0000000000000000//! Vertical (lane-wise) vector-scalar / scalar-vector arithmetic operations. macro_rules! impl_ops_scalar_arithmetic { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::ops::Add<$elem_ty> for $id { type Output = Self; #[inline] fn add(self, other: $elem_ty) -> Self { self + $id::splat(other) } } impl crate::ops::Add<$id> for $elem_ty { type Output = $id; #[inline] fn add(self, other: $id) -> $id { $id::splat(self) + other } } impl crate::ops::Sub<$elem_ty> for $id { type Output = Self; #[inline] fn sub(self, other: $elem_ty) -> Self { self - $id::splat(other) } } impl crate::ops::Sub<$id> for $elem_ty { type Output = $id; #[inline] fn sub(self, other: $id) -> $id { $id::splat(self) - other } } impl crate::ops::Mul<$elem_ty> for $id { type Output = Self; #[inline] fn mul(self, other: $elem_ty) -> Self { self * $id::splat(other) } } impl crate::ops::Mul<$id> for $elem_ty { type Output = $id; #[inline] fn mul(self, other: $id) -> $id { $id::splat(self) * other } } impl crate::ops::Div<$elem_ty> for $id { type Output = Self; #[inline] fn div(self, other: $elem_ty) -> Self { self / $id::splat(other) } } impl crate::ops::Div<$id> for $elem_ty { type Output = $id; #[inline] fn div(self, other: $id) -> $id { $id::splat(self) / other } } impl crate::ops::Rem<$elem_ty> for $id { type Output = Self; #[inline] fn rem(self, other: $elem_ty) -> Self { self % $id::splat(other) } } impl crate::ops::Rem<$id> for $elem_ty { type Output = $id; #[inline] fn rem(self, other: $id) -> $id { $id::splat(self) % other } } impl crate::ops::AddAssign<$elem_ty> for $id { #[inline] fn add_assign(&mut self, other: $elem_ty) { *self = *self + other; } } impl crate::ops::SubAssign<$elem_ty> for $id { #[inline] fn sub_assign(&mut self, other: $elem_ty) { *self = *self - other; } } impl crate::ops::MulAssign<$elem_ty> for $id { #[inline] fn mul_assign(&mut self, other: $elem_ty) { *self = *self * other; } } impl crate::ops::DivAssign<$elem_ty> for $id { #[inline] fn div_assign(&mut self, other: $elem_ty) { *self = *self / other; } } impl crate::ops::RemAssign<$elem_ty> for $id { #[inline] fn rem_assign(&mut self, other: $elem_ty) { *self = *self % other; } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_scalar_arith>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn ops_scalar_arithmetic() { let zi = 0 as $elem_ty; let oi = 1 as $elem_ty; let ti = 2 as $elem_ty; let fi = 4 as $elem_ty; let z = $id::splat(zi); let o = $id::splat(oi); let t = $id::splat(ti); let f = $id::splat(fi); // add assert_eq!(zi + z, z); assert_eq!(z + zi, z); assert_eq!(oi + z, o); assert_eq!(o + zi, o); assert_eq!(ti + z, t); assert_eq!(t + zi, t); assert_eq!(ti + t, f); assert_eq!(t + ti, f); // sub assert_eq!(zi - z, z); assert_eq!(z - zi, z); assert_eq!(oi - z, o); assert_eq!(o - zi, o); assert_eq!(ti - z, t); assert_eq!(t - zi, t); assert_eq!(fi - t, t); assert_eq!(f - ti, t); assert_eq!(f - o - o, t); assert_eq!(f - oi - oi, t); // mul assert_eq!(zi * z, z); assert_eq!(z * zi, z); assert_eq!(zi * o, z); assert_eq!(z * oi, z); assert_eq!(zi * t, z); assert_eq!(z * ti, z); assert_eq!(oi * t, t); assert_eq!(o * ti, t); assert_eq!(ti * t, f); assert_eq!(t * ti, f); // div assert_eq!(zi / o, z); assert_eq!(z / oi, z); assert_eq!(ti / o, t); assert_eq!(t / oi, t); assert_eq!(fi / o, f); assert_eq!(f / oi, f); assert_eq!(ti / t, o); assert_eq!(t / ti, o); assert_eq!(fi / t, t); assert_eq!(f / ti, t); // rem assert_eq!(oi % o, z); assert_eq!(o % oi, z); assert_eq!(fi % t, z); assert_eq!(f % ti, z); { let mut v = z; assert_eq!(v, z); v += oi; // add_assign assert_eq!(v, o); v -= oi; // sub_assign assert_eq!(v, z); v = t; v *= oi; // mul_assign assert_eq!(v, t); v *= ti; assert_eq!(v, f); v /= oi; // div_assign assert_eq!(v, f); v /= ti; assert_eq!(v, t); v %= ti; // rem_assign assert_eq!(v, z); } } } } } }; } packed_simd-0.3.3/src/api/ops/scalar_bitwise.rs010064400007650000024000000127541342460246600177210ustar0000000000000000//! Vertical (lane-wise) vector-scalar / scalar-vector bitwise operations. macro_rules! impl_ops_scalar_bitwise { ( [$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | ($true:expr, $false:expr) ) => { impl crate::ops::BitXor<$elem_ty> for $id { type Output = Self; #[inline] fn bitxor(self, other: $elem_ty) -> Self { self ^ $id::splat(other) } } impl crate::ops::BitXor<$id> for $elem_ty { type Output = $id; #[inline] fn bitxor(self, other: $id) -> $id { $id::splat(self) ^ other } } impl crate::ops::BitAnd<$elem_ty> for $id { type Output = Self; #[inline] fn bitand(self, other: $elem_ty) -> Self { self & $id::splat(other) } } impl crate::ops::BitAnd<$id> for $elem_ty { type Output = $id; #[inline] fn bitand(self, other: $id) -> $id { $id::splat(self) & other } } impl crate::ops::BitOr<$elem_ty> for $id { type Output = Self; #[inline] fn bitor(self, other: $elem_ty) -> Self { self | $id::splat(other) } } impl crate::ops::BitOr<$id> for $elem_ty { type Output = $id; #[inline] fn bitor(self, other: $id) -> $id { $id::splat(self) | other } } impl crate::ops::BitAndAssign<$elem_ty> for $id { #[inline] fn bitand_assign(&mut self, other: $elem_ty) { *self = *self & other; } } impl crate::ops::BitOrAssign<$elem_ty> for $id { #[inline] fn bitor_assign(&mut self, other: $elem_ty) { *self = *self | other; } } impl crate::ops::BitXorAssign<$elem_ty> for $id { #[inline] fn bitxor_assign(&mut self, other: $elem_ty) { *self = *self ^ other; } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_scalar_bitwise>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn ops_scalar_bitwise() { let zi = 0 as $elem_ty; let oi = 1 as $elem_ty; let ti = 2 as $elem_ty; let z = $id::splat(zi); let o = $id::splat(oi); let t = $id::splat(ti); // BitAnd: assert_eq!(oi & o, o); assert_eq!(o & oi, o); assert_eq!(oi & z, z); assert_eq!(o & zi, z); assert_eq!(zi & o, z); assert_eq!(z & oi, z); assert_eq!(zi & z, z); assert_eq!(z & zi, z); assert_eq!(ti & t, t); assert_eq!(t & ti, t); assert_eq!(ti & o, z); assert_eq!(t & oi, z); assert_eq!(oi & t, z); assert_eq!(o & ti, z); // BitOr: assert_eq!(oi | o, o); assert_eq!(o | oi, o); assert_eq!(oi | z, o); assert_eq!(o | zi, o); assert_eq!(zi | o, o); assert_eq!(z | oi, o); assert_eq!(zi | z, z); assert_eq!(z | zi, z); assert_eq!(ti | t, t); assert_eq!(t | ti, t); assert_eq!(zi | t, t); assert_eq!(z | ti, t); assert_eq!(ti | z, t); assert_eq!(t | zi, t); // BitXOR: assert_eq!(oi ^ o, z); assert_eq!(o ^ oi, z); assert_eq!(zi ^ z, z); assert_eq!(z ^ zi, z); assert_eq!(zi ^ o, o); assert_eq!(z ^ oi, o); assert_eq!(oi ^ z, o); assert_eq!(o ^ zi, o); assert_eq!(ti ^ t, z); assert_eq!(t ^ ti, z); assert_eq!(ti ^ z, t); assert_eq!(t ^ zi, t); assert_eq!(zi ^ t, t); assert_eq!(z ^ ti, t); { // AndAssign: let mut v = o; v &= ti; assert_eq!(v, z); } { // OrAssign: let mut v = z; v |= oi; assert_eq!(v, o); } { // XORAssign: let mut v = z; v ^= oi; assert_eq!(v, o); } } } } } }; } packed_simd-0.3.3/src/api/ops/scalar_mask_bitwise.rs010064400007650000024000000111011342460246600207150ustar0000000000000000//! Vertical (lane-wise) vector-vector bitwise operations. macro_rules! impl_ops_scalar_mask_bitwise { ( [$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | ($true:expr, $false:expr) ) => { impl crate::ops::BitXor for $id { type Output = Self; #[inline] fn bitxor(self, other: bool) -> Self { self ^ $id::splat(other) } } impl crate::ops::BitXor<$id> for bool { type Output = $id; #[inline] fn bitxor(self, other: $id) -> $id { $id::splat(self) ^ other } } impl crate::ops::BitAnd for $id { type Output = Self; #[inline] fn bitand(self, other: bool) -> Self { self & $id::splat(other) } } impl crate::ops::BitAnd<$id> for bool { type Output = $id; #[inline] fn bitand(self, other: $id) -> $id { $id::splat(self) & other } } impl crate::ops::BitOr for $id { type Output = Self; #[inline] fn bitor(self, other: bool) -> Self { self | $id::splat(other) } } impl crate::ops::BitOr<$id> for bool { type Output = $id; #[inline] fn bitor(self, other: $id) -> $id { $id::splat(self) | other } } impl crate::ops::BitAndAssign for $id { #[inline] fn bitand_assign(&mut self, other: bool) { *self = *self & other; } } impl crate::ops::BitOrAssign for $id { #[inline] fn bitor_assign(&mut self, other: bool) { *self = *self | other; } } impl crate::ops::BitXorAssign for $id { #[inline] fn bitxor_assign(&mut self, other: bool) { *self = *self ^ other; } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_scalar_mask_bitwise>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn ops_scalar_mask_bitwise() { let ti = true; let fi = false; let t = $id::splat(ti); let f = $id::splat(fi); assert!(t != f); assert!(!(t == f)); // BitAnd: assert_eq!(ti & f, f); assert_eq!(t & fi, f); assert_eq!(fi & t, f); assert_eq!(f & ti, f); assert_eq!(ti & t, t); assert_eq!(t & ti, t); assert_eq!(fi & f, f); assert_eq!(f & fi, f); // BitOr: assert_eq!(ti | f, t); assert_eq!(t | fi, t); assert_eq!(fi | t, t); assert_eq!(f | ti, t); assert_eq!(ti | t, t); assert_eq!(t | ti, t); assert_eq!(fi | f, f); assert_eq!(f | fi, f); // BitXOR: assert_eq!(ti ^ f, t); assert_eq!(t ^ fi, t); assert_eq!(fi ^ t, t); assert_eq!(f ^ ti, t); assert_eq!(ti ^ t, f); assert_eq!(t ^ ti, f); assert_eq!(fi ^ f, f); assert_eq!(f ^ fi, f); { // AndAssign: let mut v = f; v &= ti; assert_eq!(v, f); } { // OrAssign: let mut v = f; v |= ti; assert_eq!(v, t); } { // XORAssign: let mut v = f; v ^= ti; assert_eq!(v, t); } } } } } }; } packed_simd-0.3.3/src/api/ops/scalar_shifts.rs010064400007650000024000000101401342460246600175360ustar0000000000000000//! Vertical (lane-wise) vector-scalar shifts operations. macro_rules! impl_ops_scalar_shifts { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::ops::Shl for $id { type Output = Self; #[inline] fn shl(self, other: u32) -> Self { self << $id::splat(other as $elem_ty) } } impl crate::ops::Shr for $id { type Output = Self; #[inline] fn shr(self, other: u32) -> Self { self >> $id::splat(other as $elem_ty) } } impl crate::ops::ShlAssign for $id { #[inline] fn shl_assign(&mut self, other: u32) { *self = *self << other; } } impl crate::ops::ShrAssign for $id { #[inline] fn shr_assign(&mut self, other: u32) { *self = *self >> other; } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_scalar_shifts>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), allow(unreachable_code, unused_variables, unused_mut) )] // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 fn ops_scalar_shifts() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); let f = $id::splat(4 as $elem_ty); { let zi = 0 as u32; let oi = 1 as u32; let ti = 2 as u32; let maxi = (mem::size_of::<$elem_ty>() * 8 - 1) as u32; // shr assert_eq!(z >> zi, z); assert_eq!(z >> oi, z); assert_eq!(z >> ti, z); assert_eq!(z >> ti, z); #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/13 return; } assert_eq!(o >> zi, o); assert_eq!(t >> zi, t); assert_eq!(f >> zi, f); assert_eq!(f >> maxi, z); assert_eq!(o >> oi, z); assert_eq!(t >> oi, o); assert_eq!(t >> ti, z); assert_eq!(f >> oi, t); assert_eq!(f >> ti, o); assert_eq!(f >> maxi, z); // shl assert_eq!(z << zi, z); assert_eq!(o << zi, o); assert_eq!(t << zi, t); assert_eq!(f << zi, f); assert_eq!(f << maxi, z); assert_eq!(o << oi, t); assert_eq!(o << ti, f); assert_eq!(t << oi, f); { // shr_assign let mut v = o; v >>= oi; assert_eq!(v, z); } { // shl_assign let mut v = o; v <<= oi; assert_eq!(v, t); } } } } } } }; } packed_simd-0.3.3/src/api/ops/vector_arithmetic.rs010064400007650000024000000116221342460246600204320ustar0000000000000000//! Vertical (lane-wise) vector-vector arithmetic operations. macro_rules! impl_ops_vector_arithmetic { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::ops::Add for $id { type Output = Self; #[inline] fn add(self, other: Self) -> Self { use crate::llvm::simd_add; unsafe { Simd(simd_add(self.0, other.0)) } } } impl crate::ops::Sub for $id { type Output = Self; #[inline] fn sub(self, other: Self) -> Self { use crate::llvm::simd_sub; unsafe { Simd(simd_sub(self.0, other.0)) } } } impl crate::ops::Mul for $id { type Output = Self; #[inline] fn mul(self, other: Self) -> Self { use crate::llvm::simd_mul; unsafe { Simd(simd_mul(self.0, other.0)) } } } impl crate::ops::Div for $id { type Output = Self; #[inline] fn div(self, other: Self) -> Self { use crate::llvm::simd_div; unsafe { Simd(simd_div(self.0, other.0)) } } } impl crate::ops::Rem for $id { type Output = Self; #[inline] fn rem(self, other: Self) -> Self { use crate::llvm::simd_rem; unsafe { Simd(simd_rem(self.0, other.0)) } } } impl crate::ops::AddAssign for $id { #[inline] fn add_assign(&mut self, other: Self) { *self = *self + other; } } impl crate::ops::SubAssign for $id { #[inline] fn sub_assign(&mut self, other: Self) { *self = *self - other; } } impl crate::ops::MulAssign for $id { #[inline] fn mul_assign(&mut self, other: Self) { *self = *self * other; } } impl crate::ops::DivAssign for $id { #[inline] fn div_assign(&mut self, other: Self) { *self = *self / other; } } impl crate::ops::RemAssign for $id { #[inline] fn rem_assign(&mut self, other: Self) { *self = *self % other; } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_vector_arith>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn ops_vector_arithmetic() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); let f = $id::splat(4 as $elem_ty); // add assert_eq!(z + z, z); assert_eq!(o + z, o); assert_eq!(t + z, t); assert_eq!(t + t, f); // sub assert_eq!(z - z, z); assert_eq!(o - z, o); assert_eq!(t - z, t); assert_eq!(f - t, t); assert_eq!(f - o - o, t); // mul assert_eq!(z * z, z); assert_eq!(z * o, z); assert_eq!(z * t, z); assert_eq!(o * t, t); assert_eq!(t * t, f); // div assert_eq!(z / o, z); assert_eq!(t / o, t); assert_eq!(f / o, f); assert_eq!(t / t, o); assert_eq!(f / t, t); // rem assert_eq!(o % o, z); assert_eq!(f % t, z); { let mut v = z; assert_eq!(v, z); v += o; // add_assign assert_eq!(v, o); v -= o; // sub_assign assert_eq!(v, z); v = t; v *= o; // mul_assign assert_eq!(v, t); v *= t; assert_eq!(v, f); v /= o; // div_assign assert_eq!(v, f); v /= t; assert_eq!(v, t); v %= t; // rem_assign assert_eq!(v, z); } } } } } }; } packed_simd-0.3.3/src/api/ops/vector_bitwise.rs010064400007650000024000000102751342460246600177520ustar0000000000000000//! Vertical (lane-wise) vector-vector bitwise operations. macro_rules! impl_ops_vector_bitwise { ( [$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | ($true:expr, $false:expr) ) => { impl crate::ops::Not for $id { type Output = Self; #[inline] fn not(self) -> Self { Self::splat($true) ^ self } } impl crate::ops::BitXor for $id { type Output = Self; #[inline] fn bitxor(self, other: Self) -> Self { use crate::llvm::simd_xor; unsafe { Simd(simd_xor(self.0, other.0)) } } } impl crate::ops::BitAnd for $id { type Output = Self; #[inline] fn bitand(self, other: Self) -> Self { use crate::llvm::simd_and; unsafe { Simd(simd_and(self.0, other.0)) } } } impl crate::ops::BitOr for $id { type Output = Self; #[inline] fn bitor(self, other: Self) -> Self { use crate::llvm::simd_or; unsafe { Simd(simd_or(self.0, other.0)) } } } impl crate::ops::BitAndAssign for $id { #[inline] fn bitand_assign(&mut self, other: Self) { *self = *self & other; } } impl crate::ops::BitOrAssign for $id { #[inline] fn bitor_assign(&mut self, other: Self) { *self = *self | other; } } impl crate::ops::BitXorAssign for $id { #[inline] fn bitxor_assign(&mut self, other: Self) { *self = *self ^ other; } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_vector_bitwise>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn ops_vector_bitwise() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); let m = $id::splat(!z.extract(0)); // Not: assert_eq!(!z, m); assert_eq!(!m, z); // BitAnd: assert_eq!(o & o, o); assert_eq!(o & z, z); assert_eq!(z & o, z); assert_eq!(z & z, z); assert_eq!(t & t, t); assert_eq!(t & o, z); assert_eq!(o & t, z); // BitOr: assert_eq!(o | o, o); assert_eq!(o | z, o); assert_eq!(z | o, o); assert_eq!(z | z, z); assert_eq!(t | t, t); assert_eq!(z | t, t); assert_eq!(t | z, t); // BitXOR: assert_eq!(o ^ o, z); assert_eq!(z ^ z, z); assert_eq!(z ^ o, o); assert_eq!(o ^ z, o); assert_eq!(t ^ t, z); assert_eq!(t ^ z, t); assert_eq!(z ^ t, t); { // AndAssign: let mut v = o; v &= t; assert_eq!(v, z); } { // OrAssign: let mut v = z; v |= o; assert_eq!(v, o); } { // XORAssign: let mut v = z; v ^= o; assert_eq!(v, o); } } } } } }; } packed_simd-0.3.3/src/api/ops/vector_float_min_max.rs010064400007650000024000000051501342460246600211150ustar0000000000000000//! Vertical (lane-wise) vector `min` and `max` for floating-point vectors. macro_rules! impl_ops_vector_float_min_max { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Minimum of two vectors. /// /// Returns a new vector containing the minimum value of each of /// the input vector lanes. #[inline] pub fn min(self, x: Self) -> Self { use crate::llvm::simd_fmin; unsafe { Simd(simd_fmin(self.0, x.0)) } } /// Maximum of two vectors. /// /// Returns a new vector containing the maximum value of each of /// the input vector lanes. #[inline] pub fn max(self, x: Self) -> Self { use crate::llvm::simd_fmax; unsafe { Simd(simd_fmax(self.0, x.0)) } } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_vector_min_max>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn min_max() { let n = crate::$elem_ty::NAN; let o = $id::splat(1. as $elem_ty); let t = $id::splat(2. as $elem_ty); let mut m = o; // [1., 2., 1., 2., ...] let mut on = o; for i in 0..$id::lanes() { if i % 2 == 0 { m = m.replace(i, 2. as $elem_ty); on = on.replace(i, n); } } assert_eq!(o.min(t), o); assert_eq!(t.min(o), o); assert_eq!(m.min(o), o); assert_eq!(o.min(m), o); assert_eq!(m.min(t), m); assert_eq!(t.min(m), m); assert_eq!(o.max(t), t); assert_eq!(t.max(o), t); assert_eq!(m.max(o), m); assert_eq!(o.max(m), m); assert_eq!(m.max(t), t); assert_eq!(t.max(m), t); assert_eq!(on.min(o), o); assert_eq!(o.min(on), o); assert_eq!(on.max(o), o); assert_eq!(o.max(on), o); } } } } }; } packed_simd-0.3.3/src/api/ops/vector_int_min_max.rs010064400007650000024000000037651342460246600206140ustar0000000000000000//! Vertical (lane-wise) vector `min` and `max` for integer vectors. macro_rules! impl_ops_vector_int_min_max { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Minimum of two vectors. /// /// Returns a new vector containing the minimum value of each of /// the input vector lanes. #[inline] pub fn min(self, x: Self) -> Self { self.lt(x).select(self, x) } /// Maximum of two vectors. /// /// Returns a new vector containing the maximum value of each of /// the input vector lanes. #[inline] pub fn max(self, x: Self) -> Self { self.gt(x).select(self, x) } } test_if!{$test_tt: paste::item! { pub mod [<$id _ops_vector_min_max>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn min_max() { let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); let mut m = o; for i in 0..$id::lanes() { if i % 2 == 0 { m = m.replace(i, 2 as $elem_ty); } } assert_eq!(o.min(t), o); assert_eq!(t.min(o), o); assert_eq!(m.min(o), o); assert_eq!(o.min(m), o); assert_eq!(m.min(t), m); assert_eq!(t.min(m), m); assert_eq!(o.max(t), t); assert_eq!(t.max(o), t); assert_eq!(m.max(o), m); assert_eq!(o.max(m), m); assert_eq!(m.max(t), t); assert_eq!(t.max(m), t); } } } } }; } packed_simd-0.3.3/src/api/ops/vector_mask_bitwise.rs010064400007650000024000000073701342460246600207670ustar0000000000000000//! Vertical (lane-wise) vector-vector bitwise operations. macro_rules! impl_ops_vector_mask_bitwise { ( [$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt | ($true:expr, $false:expr) ) => { impl crate::ops::Not for $id { type Output = Self; #[inline] fn not(self) -> Self { Self::splat($true) ^ self } } impl crate::ops::BitXor for $id { type Output = Self; #[inline] fn bitxor(self, other: Self) -> Self { use crate::llvm::simd_xor; unsafe { Simd(simd_xor(self.0, other.0)) } } } impl crate::ops::BitAnd for $id { type Output = Self; #[inline] fn bitand(self, other: Self) -> Self { use crate::llvm::simd_and; unsafe { Simd(simd_and(self.0, other.0)) } } } impl crate::ops::BitOr for $id { type Output = Self; #[inline] fn bitor(self, other: Self) -> Self { use crate::llvm::simd_or; unsafe { Simd(simd_or(self.0, other.0)) } } } impl crate::ops::BitAndAssign for $id { #[inline] fn bitand_assign(&mut self, other: Self) { *self = *self & other; } } impl crate::ops::BitOrAssign for $id { #[inline] fn bitor_assign(&mut self, other: Self) { *self = *self | other; } } impl crate::ops::BitXorAssign for $id { #[inline] fn bitxor_assign(&mut self, other: Self) { *self = *self ^ other; } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_vector_mask_bitwise>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn ops_vector_mask_bitwise() { let t = $id::splat(true); let f = $id::splat(false); assert!(t != f); assert!(!(t == f)); // Not: assert_eq!(!t, f); assert_eq!(t, !f); // BitAnd: assert_eq!(t & f, f); assert_eq!(f & t, f); assert_eq!(t & t, t); assert_eq!(f & f, f); // BitOr: assert_eq!(t | f, t); assert_eq!(f | t, t); assert_eq!(t | t, t); assert_eq!(f | f, f); // BitXOR: assert_eq!(t ^ f, t); assert_eq!(f ^ t, t); assert_eq!(t ^ t, f); assert_eq!(f ^ f, f); { // AndAssign: let mut v = f; v &= t; assert_eq!(v, f); } { // OrAssign: let mut v = f; v |= t; assert_eq!(v, t); } { // XORAssign: let mut v = f; v ^= t; assert_eq!(v, t); } } } } } }; } packed_simd-0.3.3/src/api/ops/vector_neg.rs010064400007650000024000000030411342460246600170460ustar0000000000000000//! Vertical (lane-wise) vector `Neg`. macro_rules! impl_ops_vector_neg { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::ops::Neg for $id { type Output = Self; #[inline] fn neg(self) -> Self { Self::splat(-1 as $elem_ty) * self } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_vector_neg>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn neg() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); let f = $id::splat(4 as $elem_ty); let nz = $id::splat(-(0 as $elem_ty)); let no = $id::splat(-(1 as $elem_ty)); let nt = $id::splat(-(2 as $elem_ty)); let nf = $id::splat(-(4 as $elem_ty)); assert_eq!(-z, nz); assert_eq!(-o, no); assert_eq!(-t, nt); assert_eq!(-f, nf); assert_eq!(z, -nz); assert_eq!(o, -no); assert_eq!(t, -nt); assert_eq!(f, -nf); } } } } }; } packed_simd-0.3.3/src/api/ops/vector_rotates.rs010064400007650000024000000075741342460246600177750ustar0000000000000000//! Vertical (lane-wise) vector rotates operations. #![allow(unused)] macro_rules! impl_ops_vector_rotates { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Shifts the bits of each lane to the left by the specified /// amount in the corresponding lane of `n`, wrapping the /// truncated bits to the end of the resulting integer. /// /// Note: this is neither the same operation as `<<` nor equivalent /// to `slice::rotate_left`. #[inline] pub fn rotate_left(self, n: $id) -> $id { const LANE_WIDTH: $elem_ty = crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; // Protect against undefined behavior for over-long bit shifts let n = n % LANE_WIDTH; (self << n) | (self >> ((LANE_WIDTH - n) % LANE_WIDTH)) } /// Shifts the bits of each lane to the right by the specified /// amount in the corresponding lane of `n`, wrapping the /// truncated bits to the beginning of the resulting integer. /// /// Note: this is neither the same operation as `<<` nor equivalent /// to `slice::rotate_left`. #[inline] pub fn rotate_right(self, n: $id) -> $id { const LANE_WIDTH: $elem_ty = crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; // Protect against undefined behavior for over-long bit shifts let n = n % LANE_WIDTH; (self >> n) | (self << ((LANE_WIDTH - n) % LANE_WIDTH)) } } test_if!{ $test_tt: paste::item! { // FIXME: // https://github.com/rust-lang-nursery/packed_simd/issues/75 #[cfg(not(any( target_arch = "s390x", target_arch = "sparc64", )))] pub mod [<$id _ops_vector_rotate>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn rotate_ops() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); let f = $id::splat(4 as $elem_ty); let max = $id::splat( (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty); // rotate_right assert_eq!(z.rotate_right(z), z); assert_eq!(z.rotate_right(o), z); assert_eq!(z.rotate_right(t), z); assert_eq!(o.rotate_right(z), o); assert_eq!(t.rotate_right(z), t); assert_eq!(f.rotate_right(z), f); assert_eq!(f.rotate_right(max), f << 1); assert_eq!(o.rotate_right(o), o << max); assert_eq!(t.rotate_right(o), o); assert_eq!(t.rotate_right(t), o << max); assert_eq!(f.rotate_right(o), t); assert_eq!(f.rotate_right(t), o); // rotate_left assert_eq!(z.rotate_left(z), z); assert_eq!(o.rotate_left(z), o); assert_eq!(t.rotate_left(z), t); assert_eq!(f.rotate_left(z), f); assert_eq!(f.rotate_left(max), t); assert_eq!(o.rotate_left(o), t); assert_eq!(o.rotate_left(t), f); assert_eq!(t.rotate_left(o), f); } } } } }; } packed_simd-0.3.3/src/api/ops/vector_shifts.rs010064400007650000024000000077561342460246600176160ustar0000000000000000//! Vertical (lane-wise) vector-vector shifts operations. macro_rules! impl_ops_vector_shifts { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl crate::ops::Shl<$id> for $id { type Output = Self; #[inline] fn shl(self, other: Self) -> Self { use crate::llvm::simd_shl; unsafe { Simd(simd_shl(self.0, other.0)) } } } impl crate::ops::Shr<$id> for $id { type Output = Self; #[inline] fn shr(self, other: Self) -> Self { use crate::llvm::simd_shr; unsafe { Simd(simd_shr(self.0, other.0)) } } } impl crate::ops::ShlAssign<$id> for $id { #[inline] fn shl_assign(&mut self, other: Self) { *self = *self << other; } } impl crate::ops::ShrAssign<$id> for $id { #[inline] fn shr_assign(&mut self, other: Self) { *self = *self >> other; } } test_if!{ $test_tt: paste::item! { pub mod [<$id _ops_vector_shifts>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), allow(unreachable_code, unused_variables, unused_mut) )] // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 fn ops_vector_shifts() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); let t = $id::splat(2 as $elem_ty); let f = $id::splat(4 as $elem_ty); let max =$id::splat( (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty ); // shr assert_eq!(z >> z, z); assert_eq!(z >> o, z); assert_eq!(z >> t, z); assert_eq!(z >> t, z); #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { // FIXME: rust produces bad codegen for shifts: // https://github.com/rust-lang-nursery/packed_simd/issues/13 return; } assert_eq!(o >> z, o); assert_eq!(t >> z, t); assert_eq!(f >> z, f); assert_eq!(f >> max, z); assert_eq!(o >> o, z); assert_eq!(t >> o, o); assert_eq!(t >> t, z); assert_eq!(f >> o, t); assert_eq!(f >> t, o); assert_eq!(f >> max, z); // shl assert_eq!(z << z, z); assert_eq!(o << z, o); assert_eq!(t << z, t); assert_eq!(f << z, f); assert_eq!(f << max, z); assert_eq!(o << o, t); assert_eq!(o << t, f); assert_eq!(t << o, f); { // shr_assign let mut v = o; v >>= o; assert_eq!(v, z); } { // shl_assign let mut v = o; v <<= o; assert_eq!(v, t); } } } } } }; } packed_simd-0.3.3/src/api/ptr.rs010064400007650000024000000000711333454520600147150ustar0000000000000000//! Vector of pointers #[macro_use] mod gather_scatter; packed_simd-0.3.3/src/api/ptr/gather_scatter.rs010064400007650000024000000222311342460246600177200ustar0000000000000000//! Implements masked gather and scatters for vectors of pointers macro_rules! impl_ptr_read { ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident | $test_tt:tt) => { impl $id where [T; $elem_count]: sealed::SimdArray, { /// Reads selected vector elements from memory. /// /// Instantiates a new vector by reading the values from `self` for /// those lanes whose `mask` is `true`, and using the elements of /// `value` otherwise. /// /// No memory is accessed for those lanes of `self` whose `mask` is /// `false`. /// /// # Safety /// /// This method is unsafe because it dereferences raw pointers. The /// pointers must be aligned to `mem::align_of::()`. #[inline] pub unsafe fn read( self, mask: Simd<[M; $elem_count]>, value: Simd<[T; $elem_count]>, ) -> Simd<[T; $elem_count]> where M: sealed::Mask, [M; $elem_count]: sealed::SimdArray, { use crate::llvm::simd_gather; Simd(simd_gather(value.0, self.0, mask.0)) } } test_if! { $test_tt: paste::item! { mod [<$id _read>] { use super::*; #[test] fn read() { let mut v = [0_i32; $elem_count]; for i in 0..$elem_count { v[i] = i as i32; } let mut ptr = $id::::null(); for i in 0..$elem_count { ptr = ptr.replace(i, unsafe { crate::mem::transmute(&v[i] as *const i32) }); } // all mask elements are true: let mask = $mask_ty::splat(true); let def = Simd::<[i32; $elem_count]>::splat(42_i32); let r: Simd<[i32; $elem_count]> = unsafe { ptr.read(mask, def) }; assert_eq!( r, Simd::<[i32; $elem_count]>::from_slice_unaligned( &v ) ); let mut mask = mask; for i in 0..$elem_count { if i % 2 != 0 { mask = mask.replace(i, false); } } // even mask elements are true, odd ones are false: let r: Simd<[i32; $elem_count]> = unsafe { ptr.read(mask, def) }; let mut e = v; for i in 0..$elem_count { if i % 2 != 0 { e[i] = 42; } } assert_eq!( r, Simd::<[i32; $elem_count]>::from_slice_unaligned( &e ) ); // all mask elements are false: let mask = $mask_ty::splat(false); let def = Simd::<[i32; $elem_count]>::splat(42_i32); let r: Simd<[i32; $elem_count]> = unsafe { ptr.read(mask, def) } ; assert_eq!(r, def); } } } } }; } macro_rules! impl_ptr_write { ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident | $test_tt:tt) => { impl $id where [T; $elem_count]: sealed::SimdArray, { /// Writes selected vector elements to memory. /// /// Writes the lanes of `values` for which the mask is `true` to /// their corresponding memory addresses in `self`. /// /// No memory is accessed for those lanes of `self` whose `mask` is /// `false`. /// /// Overlapping memory addresses of `self` are written to in order /// from the lest-significant to the most-significant element. /// /// # Safety /// /// This method is unsafe because it dereferences raw pointers. The /// pointers must be aligned to `mem::align_of::()`. #[inline] pub unsafe fn write( self, mask: Simd<[M; $elem_count]>, value: Simd<[T; $elem_count]>, ) where M: sealed::Mask, [M; $elem_count]: sealed::SimdArray, { // FIXME: // https://github.com/rust-lang-nursery/packed_simd/issues/85 #[cfg(not(target_arch = "mips"))] { use crate::llvm::simd_scatter; simd_scatter(value.0, self.0, mask.0) } #[cfg(target_arch = "mips")] { let m_ptr = &mask as *const Simd<[M; $elem_count]> as *const M; for i in 0..$elem_count { let m = ptr::read(m_ptr.add(i)); if m.test() { let t_ptr = &self as *const Simd<[*mut T; $elem_count]> as *mut *mut T; let v_ptr = &value as *const Simd<[T; $elem_count]> as *const T; ptr::write( ptr::read(t_ptr.add(i)), ptr::read(v_ptr.add(i)), ); } } } } } test_if! { $test_tt: paste::item! { mod [<$id _write>] { use super::*; #[test] fn write() { // fourty_two = [42, 42, 42, ...] let fourty_two = Simd::<[i32; $elem_count]>::splat(42_i32); // This test will write to this array let mut arr = [0_i32; $elem_count]; for i in 0..$elem_count { arr[i] = i as i32; } // arr = [0, 1, 2, ...] let mut ptr = $id::::null(); for i in 0..$elem_count { ptr = ptr.replace(i, unsafe { crate::mem::transmute(arr.as_ptr().add(i)) }); } // ptr = [&arr[0], &arr[1], ...] // write `fourty_two` to all elements of `v` { let backup = arr; unsafe { ptr.write($mask_ty::splat(true), fourty_two) }; assert_eq!(arr, [42_i32; $elem_count]); arr = backup; // arr = [0, 1, 2, ...] } // write 42 to even elements of arr: { // set odd elements of the mask to false let mut mask = $mask_ty::splat(true); for i in 0..$elem_count { if i % 2 != 0 { mask = mask.replace(i, false); } } // mask = [true, false, true, false, ...] // expected result r = [42, 1, 42, 3, 42, 5, ...] let mut r = arr; for i in 0..$elem_count { if i % 2 == 0 { r[i] = 42; } } let backup = arr; unsafe { ptr.write(mask, fourty_two) }; assert_eq!(arr, r); arr = backup; // arr = [0, 1, 2, 3, ...] } // write 42 to no elements of arr { let backup = arr; unsafe { ptr.write($mask_ty::splat(false), fourty_two) }; assert_eq!(arr, backup); } } } } } }; } packed_simd-0.3.3/src/api/reductions.rs010064400007650000024000000002431332536563700163000ustar0000000000000000//! Reductions #[macro_use] mod float_arithmetic; #[macro_use] mod integer_arithmetic; #[macro_use] mod bitwise; #[macro_use] mod mask; #[macro_use] mod min_max; packed_simd-0.3.3/src/api/reductions/bitwise.rs010064400007650000024000000137351342460246600177520ustar0000000000000000//! Implements portable horizontal bitwise vector reductions. #![allow(unused)] macro_rules! impl_reduction_bitwise { ( [$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | $test_tt:tt | ($convert:expr) | ($true:expr, $false:expr) ) => { impl $id { /// Lane-wise bitwise `and` of the vector elements. /// /// Note: if the vector has one lane, the first element of the /// vector is returned. #[inline] pub fn and(self) -> $elem_ty { #[cfg(not(target_arch = "aarch64"))] { use crate::llvm::simd_reduce_and; let r: $ielem_ty = unsafe { simd_reduce_and(self.0) }; $convert(r) } #[cfg(target_arch = "aarch64")] { // FIXME: broken on aarch64 // https://github.com/rust-lang-nursery/packed_simd/issues/15 let mut x = self.extract(0) as $elem_ty; for i in 1..$id::lanes() { x &= self.extract(i) as $elem_ty; } x } } /// Lane-wise bitwise `or` of the vector elements. /// /// Note: if the vector has one lane, the first element of the /// vector is returned. #[inline] pub fn or(self) -> $elem_ty { #[cfg(not(target_arch = "aarch64"))] { use crate::llvm::simd_reduce_or; let r: $ielem_ty = unsafe { simd_reduce_or(self.0) }; $convert(r) } #[cfg(target_arch = "aarch64")] { // FIXME: broken on aarch64 // https://github.com/rust-lang-nursery/packed_simd/issues/15 let mut x = self.extract(0) as $elem_ty; for i in 1..$id::lanes() { x |= self.extract(i) as $elem_ty; } x } } /// Lane-wise bitwise `xor` of the vector elements. /// /// Note: if the vector has one lane, the first element of the /// vector is returned. #[inline] pub fn xor(self) -> $elem_ty { #[cfg(not(target_arch = "aarch64"))] { use crate::llvm::simd_reduce_xor; let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) }; $convert(r) } #[cfg(target_arch = "aarch64")] { // FIXME: broken on aarch64 // https://github.com/rust-lang-nursery/packed_simd/issues/15 let mut x = self.extract(0) as $elem_ty; for i in 1..$id::lanes() { x ^= self.extract(i) as $elem_ty; } x } } } test_if!{ $test_tt: paste::item! { pub mod [<$id _reduction_bitwise>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn and() { let v = $id::splat($false); assert_eq!(v.and(), $false); let v = $id::splat($true); assert_eq!(v.and(), $true); let v = $id::splat($false); let v = v.replace(0, $true); if $id::lanes() > 1 { assert_eq!(v.and(), $false); } else { assert_eq!(v.and(), $true); } let v = $id::splat($true); let v = v.replace(0, $false); assert_eq!(v.and(), $false); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn or() { let v = $id::splat($false); assert_eq!(v.or(), $false); let v = $id::splat($true); assert_eq!(v.or(), $true); let v = $id::splat($false); let v = v.replace(0, $true); assert_eq!(v.or(), $true); let v = $id::splat($true); let v = v.replace(0, $false); if $id::lanes() > 1 { assert_eq!(v.or(), $true); } else { assert_eq!(v.or(), $false); } } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn xor() { let v = $id::splat($false); assert_eq!(v.xor(), $false); let v = $id::splat($true); if $id::lanes() > 1 { assert_eq!(v.xor(), $false); } else { assert_eq!(v.xor(), $true); } let v = $id::splat($false); let v = v.replace(0, $true); assert_eq!(v.xor(), $true); let v = $id::splat($true); let v = v.replace(0, $false); if $id::lanes() > 1 { assert_eq!(v.xor(), $true); } else { assert_eq!(v.xor(), $false); } } } } } }; } packed_simd-0.3.3/src/api/reductions/float_arithmetic.rs010064400007650000024000000326561342461154100216170ustar0000000000000000//! Implements portable horizontal float vector arithmetic reductions. macro_rules! impl_reduction_float_arithmetic { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Horizontal sum of the vector elements. /// /// The intrinsic performs a tree-reduction of the vector elements. /// That is, for an 8 element vector: /// /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) /// /// If one of the vector element is `NaN` the reduction returns /// `NaN`. The resulting `NaN` is not required to be equal to any /// of the `NaN`s in the vector. #[inline] pub fn sum(self) -> $elem_ty { #[cfg(not(target_arch = "aarch64"))] { use crate::llvm::simd_reduce_add_ordered; unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) } } #[cfg(target_arch = "aarch64")] { // FIXME: broken on AArch64 // https://github.com/rust-lang-nursery/packed_simd/issues/15 let mut x = self.extract(0) as $elem_ty; for i in 1..$id::lanes() { x += self.extract(i) as $elem_ty; } x } } /// Horizontal product of the vector elements. /// /// The intrinsic performs a tree-reduction of the vector elements. /// That is, for an 8 element vector: /// /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) /// /// If one of the vector element is `NaN` the reduction returns /// `NaN`. The resulting `NaN` is not required to be equal to any /// of the `NaN`s in the vector. #[inline] pub fn product(self) -> $elem_ty { #[cfg(not(target_arch = "aarch64"))] { use crate::llvm::simd_reduce_mul_ordered; unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) } } #[cfg(target_arch = "aarch64")] { // FIXME: broken on AArch64 // https://github.com/rust-lang-nursery/packed_simd/issues/15 let mut x = self.extract(0) as $elem_ty; for i in 1..$id::lanes() { x *= self.extract(i) as $elem_ty; } x } } } impl crate::iter::Sum for $id { #[inline] fn sum>(iter: I) -> $id { iter.fold($id::splat(0.), crate::ops::Add::add) } } impl crate::iter::Product for $id { #[inline] fn product>(iter: I) -> $id { iter.fold($id::splat(1.), crate::ops::Mul::mul) } } impl<'a> crate::iter::Sum<&'a $id> for $id { #[inline] fn sum>(iter: I) -> $id { iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b)) } } impl<'a> crate::iter::Product<&'a $id> for $id { #[inline] fn product>(iter: I) -> $id { iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b)) } } test_if! { $test_tt: paste::item! { pub mod [<$id _reduction_float_arith>] { use super::*; fn alternating(x: usize) -> $id { let mut v = $id::splat(1 as $elem_ty); for i in 0..$id::lanes() { if i % x == 0 { v = v.replace(i, 2 as $elem_ty); } } v } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn sum() { let v = $id::splat(0 as $elem_ty); assert_eq!(v.sum(), 0 as $elem_ty); let v = $id::splat(1 as $elem_ty); assert_eq!(v.sum(), $id::lanes() as $elem_ty); let v = alternating(2); assert_eq!( v.sum(), ($id::lanes() / 2 + $id::lanes()) as $elem_ty ); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn product() { let v = $id::splat(0 as $elem_ty); assert_eq!(v.product(), 0 as $elem_ty); let v = $id::splat(1 as $elem_ty); assert_eq!(v.product(), 1 as $elem_ty); let f = match $id::lanes() { 64 => 16, 32 => 8, 16 => 4, _ => 2, }; let v = alternating(f); assert_eq!( v.product(), (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty) ); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[allow(unreachable_code)] #[allow(unused_mut)] // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 fn sum_nan() { // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 // https://github.com/rust-lang-nursery/packed_simd/issues/6 return; let n0 = crate::$elem_ty::NAN; let v0 = $id::splat(-3.0); for i in 0..$id::lanes() { let mut v = v0.replace(i, n0); // If the vector contains a NaN the result is NaN: assert!( v.sum().is_nan(), "nan at {} => {} | {:?}", i, v.sum(), v ); for j in 0..i { v = v.replace(j, n0); assert!(v.sum().is_nan()); } } let v = $id::splat(n0); assert!(v.sum().is_nan(), "all nans | {:?}", v); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[allow(unreachable_code)] #[allow(unused_mut)] // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 fn product_nan() { // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 // https://github.com/rust-lang-nursery/packed_simd/issues/6 return; let n0 = crate::$elem_ty::NAN; let v0 = $id::splat(-3.0); for i in 0..$id::lanes() { let mut v = v0.replace(i, n0); // If the vector contains a NaN the result is NaN: assert!( v.product().is_nan(), "nan at {} => {} | {:?}", i, v.product(), v ); for j in 0..i { v = v.replace(j, n0); assert!(v.product().is_nan()); } } let v = $id::splat(n0); assert!(v.product().is_nan(), "all nans | {:?}", v); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[allow(unused, dead_code)] fn sum_roundoff() { // Performs a tree-reduction fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty { assert!(!a.is_empty()); if a.len() == 1 { a[0] } else if a.len() == 2 { a[0] + a[1] } else { let mid = a.len() / 2; let (left, right) = a.split_at(mid); tree_reduce_sum(left) + tree_reduce_sum(right) } } let mut start = crate::$elem_ty::EPSILON; let mut scalar_reduction = 0. as $elem_ty; let mut v = $id::splat(0. as $elem_ty); for i in 0..$id::lanes() { let c = if i % 2 == 0 { 1e3 } else { -1. }; start *= 3.14 * c; scalar_reduction += start; v = v.replace(i, start); } let simd_reduction = v.sum(); let mut a = [0. as $elem_ty; $id::lanes()]; v.write_to_slice_unaligned(&mut a); let tree_reduction = tree_reduce_sum(&a); // tolerate 1 ULP difference: let red_bits = simd_reduction.to_bits(); let tree_bits = tree_reduction.to_bits(); assert!( if red_bits > tree_bits { red_bits - tree_bits } else { tree_bits - red_bits } < 2, "vector: {:?} | simd_reduction: {:?} | \ tree_reduction: {} | scalar_reduction: {}", v, simd_reduction, tree_reduction, scalar_reduction ); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[allow(unused, dead_code)] fn product_roundoff() { // Performs a tree-reduction fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty { assert!(!a.is_empty()); if a.len() == 1 { a[0] } else if a.len() == 2 { a[0] * a[1] } else { let mid = a.len() / 2; let (left, right) = a.split_at(mid); tree_reduce_product(left) * tree_reduce_product(right) } } let mut start = crate::$elem_ty::EPSILON; let mut scalar_reduction = 1. as $elem_ty; let mut v = $id::splat(0. as $elem_ty); for i in 0..$id::lanes() { let c = if i % 2 == 0 { 1e3 } else { -1. }; start *= 3.14 * c; scalar_reduction *= start; v = v.replace(i, start); } let simd_reduction = v.product(); let mut a = [0. as $elem_ty; $id::lanes()]; v.write_to_slice_unaligned(&mut a); let tree_reduction = tree_reduce_product(&a); // tolerate 1 ULP difference: let red_bits = simd_reduction.to_bits(); let tree_bits = tree_reduction.to_bits(); assert!( if red_bits > tree_bits { red_bits - tree_bits } else { tree_bits - red_bits } < 2, "vector: {:?} | simd_reduction: {:?} | \ tree_reduction: {} | scalar_reduction: {}", v, simd_reduction, tree_reduction, scalar_reduction ); } } } } }; } packed_simd-0.3.3/src/api/reductions/integer_arithmetic.rs010064400007650000024000000176661342461154100221530ustar0000000000000000//! Implements portable horizontal integer vector arithmetic reductions. macro_rules! impl_reduction_integer_arithmetic { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | $test_tt:tt) => { impl $id { /// Horizontal wrapping sum of the vector elements. /// /// The intrinsic performs a tree-reduction of the vector elements. /// That is, for an 8 element vector: /// /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) /// /// If an operation overflows it returns the mathematical result /// modulo `2^n` where `n` is the number of times it overflows. #[inline] pub fn wrapping_sum(self) -> $elem_ty { #[cfg(not(target_arch = "aarch64"))] { use crate::llvm::simd_reduce_add_ordered; let v: $ielem_ty = unsafe { simd_reduce_add_ordered(self.0, 0 as $ielem_ty) }; v as $elem_ty } #[cfg(target_arch = "aarch64")] { // FIXME: broken on AArch64 // https://github.com/rust-lang-nursery/packed_simd/issues/15 let mut x = self.extract(0) as $elem_ty; for i in 1..$id::lanes() { x = x.wrapping_add(self.extract(i) as $elem_ty); } x } } /// Horizontal wrapping product of the vector elements. /// /// The intrinsic performs a tree-reduction of the vector elements. /// That is, for an 8 element vector: /// /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) /// /// If an operation overflows it returns the mathematical result /// modulo `2^n` where `n` is the number of times it overflows. #[inline] pub fn wrapping_product(self) -> $elem_ty { #[cfg(not(target_arch = "aarch64"))] { use crate::llvm::simd_reduce_mul_ordered; let v: $ielem_ty = unsafe { simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) }; v as $elem_ty } #[cfg(target_arch = "aarch64")] { // FIXME: broken on AArch64 // https://github.com/rust-lang-nursery/packed_simd/issues/15 let mut x = self.extract(0) as $elem_ty; for i in 1..$id::lanes() { x = x.wrapping_mul(self.extract(i) as $elem_ty); } x } } } impl crate::iter::Sum for $id { #[inline] fn sum>(iter: I) -> $id { iter.fold($id::splat(0), crate::ops::Add::add) } } impl crate::iter::Product for $id { #[inline] fn product>(iter: I) -> $id { iter.fold($id::splat(1), crate::ops::Mul::mul) } } impl<'a> crate::iter::Sum<&'a $id> for $id { #[inline] fn sum>(iter: I) -> $id { iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b)) } } impl<'a> crate::iter::Product<&'a $id> for $id { #[inline] fn product>(iter: I) -> $id { iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b)) } } test_if! { $test_tt: paste::item! { pub mod [<$id _reduction_int_arith>] { use super::*; fn alternating(x: usize) -> $id { let mut v = $id::splat(1 as $elem_ty); for i in 0..$id::lanes() { if i % x == 0 { v = v.replace(i, 2 as $elem_ty); } } v } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn wrapping_sum() { let v = $id::splat(0 as $elem_ty); assert_eq!(v.wrapping_sum(), 0 as $elem_ty); let v = $id::splat(1 as $elem_ty); assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty); let v = alternating(2); if $id::lanes() > 1 { assert_eq!( v.wrapping_sum(), ($id::lanes() / 2 + $id::lanes()) as $elem_ty ); } else { assert_eq!( v.wrapping_sum(), 2 as $elem_ty ); } } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn wrapping_sum_overflow() { let start = $elem_ty::max_value() - ($id::lanes() as $elem_ty / 2); let v = $id::splat(start as $elem_ty); let vwrapping_sum = v.wrapping_sum(); let mut wrapping_sum = start; for _ in 1..$id::lanes() { wrapping_sum = wrapping_sum.wrapping_add(start); } assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn wrapping_product() { let v = $id::splat(0 as $elem_ty); assert_eq!(v.wrapping_product(), 0 as $elem_ty); let v = $id::splat(1 as $elem_ty); assert_eq!(v.wrapping_product(), 1 as $elem_ty); let f = match $id::lanes() { 64 => 16, 32 => 8, 16 => 4, _ => 2, }; let v = alternating(f); if $id::lanes() > 1 { assert_eq!( v.wrapping_product(), (2_usize.pow(($id::lanes() / f) as u32) as $elem_ty) ); } else { assert_eq!( v.wrapping_product(), 2 as $elem_ty ); } } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn wrapping_product_overflow() { let start = $elem_ty::max_value() - ($id::lanes() as $elem_ty / 2); let v = $id::splat(start as $elem_ty); let vmul = v.wrapping_product(); let mut mul = start; for _ in 1..$id::lanes() { mul = mul.wrapping_mul(start); } assert_eq!(mul, vmul, "v = {:?}", v); } } } } }; } packed_simd-0.3.3/src/api/reductions/mask.rs010064400007650000024000000066701342460246600172370ustar0000000000000000//! Implements portable horizontal mask reductions. macro_rules! impl_reduction_mask { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Are `all` vector lanes `true`? #[inline] pub fn all(self) -> bool { unsafe { crate::codegen::reductions::mask::All::all(self) } } /// Is `any` vector lane `true`? #[inline] pub fn any(self) -> bool { unsafe { crate::codegen::reductions::mask::Any::any(self) } } /// Are `all` vector lanes `false`? #[inline] pub fn none(self) -> bool { !self.any() } } test_if!{ $test_tt: paste::item! { pub mod [<$id _reduction>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn all() { let a = $id::splat(true); assert!(a.all()); let a = $id::splat(false); assert!(!a.all()); if $id::lanes() > 1 { for i in 0..$id::lanes() { let mut a = $id::splat(true); a = a.replace(i, false); assert!(!a.all()); let mut a = $id::splat(false); a = a.replace(i, true); assert!(!a.all()); } } } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn any() { let a = $id::splat(true); assert!(a.any()); let a = $id::splat(false); assert!(!a.any()); if $id::lanes() > 1 { for i in 0..$id::lanes() { let mut a = $id::splat(true); a = a.replace(i, false); assert!(a.any()); let mut a = $id::splat(false); a = a.replace(i, true); assert!(a.any()); } } } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn none() { let a = $id::splat(true); assert!(!a.none()); let a = $id::splat(false); assert!(a.none()); if $id::lanes() > 1 { for i in 0..$id::lanes() { let mut a = $id::splat(true); a = a.replace(i, false); assert!(!a.none()); let mut a = $id::splat(false); a = a.replace(i, true); assert!(!a.none()); } } } } } } }; } packed_simd-0.3.3/src/api/reductions/min_max.rs010064400007650000024000000445361342461154100177310ustar0000000000000000//! Implements portable horizontal vector min/max reductions. macro_rules! impl_reduction_min_max { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | $test_tt:tt) => { impl $id { /// Largest vector element value. #[inline] pub fn max_element(self) -> $elem_ty { #[cfg(not(any( target_arch = "aarch64", target_arch = "arm", target_arch = "powerpc64", target_arch = "wasm32", )))] { use crate::llvm::simd_reduce_max; let v: $ielem_ty = unsafe { simd_reduce_max(self.0) }; v as $elem_ty } #[cfg(any( target_arch = "aarch64", target_arch = "arm", target_arch = "powerpc64", target_arch = "wasm32", ))] { // FIXME: broken on AArch64 // https://github.com/rust-lang-nursery/packed_simd/issues/15 // FIXME: broken on WASM32 // https://github.com/rust-lang-nursery/packed_simd/issues/91 let mut x = self.extract(0); for i in 1..$id::lanes() { x = x.max(self.extract(i)); } x } } /// Smallest vector element value. #[inline] pub fn min_element(self) -> $elem_ty { #[cfg(not(any( target_arch = "aarch64", target_arch = "arm", all(target_arch = "x86", not(target_feature = "sse2")), target_arch = "powerpc64", target_arch = "wasm32", ),))] { use crate::llvm::simd_reduce_min; let v: $ielem_ty = unsafe { simd_reduce_min(self.0) }; v as $elem_ty } #[cfg(any( target_arch = "aarch64", target_arch = "arm", all(target_arch = "x86", not(target_feature = "sse2")), target_arch = "powerpc64", target_arch = "wasm32", ))] { // FIXME: broken on AArch64 // https://github.com/rust-lang-nursery/packed_simd/issues/15 // FIXME: broken on i586-unknown-linux-gnu // https://github.com/rust-lang-nursery/packed_simd/issues/22 // FIXME: broken on WASM32 // https://github.com/rust-lang-nursery/packed_simd/issues/91 let mut x = self.extract(0); for i in 1..$id::lanes() { x = x.min(self.extract(i)); } x } } } test_if! {$test_tt: paste::item! { pub mod [<$id _reduction_min_max>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] pub fn max_element() { let v = $id::splat(0 as $elem_ty); assert_eq!(v.max_element(), 0 as $elem_ty); if $id::lanes() > 1 { let v = v.replace(1, 1 as $elem_ty); assert_eq!(v.max_element(), 1 as $elem_ty); } let v = v.replace(0, 2 as $elem_ty); assert_eq!(v.max_element(), 2 as $elem_ty); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] pub fn min_element() { let v = $id::splat(0 as $elem_ty); assert_eq!(v.min_element(), 0 as $elem_ty); if $id::lanes() > 1 { let v = v.replace(1, 1 as $elem_ty); assert_eq!(v.min_element(), 0 as $elem_ty); } let v = $id::splat(1 as $elem_ty); let v = v.replace(0, 2 as $elem_ty); if $id::lanes() > 1 { assert_eq!(v.min_element(), 1 as $elem_ty); } else { assert_eq!(v.min_element(), 2 as $elem_ty); } if $id::lanes() > 1 { let v = $id::splat(2 as $elem_ty); let v = v.replace(1, 1 as $elem_ty); assert_eq!(v.min_element(), 1 as $elem_ty); } } } } } }; } macro_rules! test_reduction_float_min_max { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { test_if!{ $test_tt: paste::item! { pub mod [<$id _reduction_min_max_nan>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn min_element_test() { let n = crate::$elem_ty::NAN; assert_eq!(n.min(-3.), -3.); assert_eq!((-3. as $elem_ty).min(n), -3.); let v0 = $id::splat(-3.); let target_with_broken_last_lane_nan = !cfg!(any( target_arch = "arm", target_arch = "aarch64", all(target_arch = "x86", not(target_feature = "sse2") ), target_arch = "powerpc64", target_arch = "wasm32", )); // The vector is initialized to `-3.`s: [-3, -3, -3, -3] for i in 0..$id::lanes() { // We replace the i-th element of the vector with // `NaN`: [-3, -3, -3, NaN] let mut v = v0.replace(i, n); // If the NaN is in the last place, the LLVM // implementation of these methods is broken on some // targets: if i == $id::lanes() - 1 && target_with_broken_last_lane_nan { // FIXME: // https://github.com/rust-lang-nursery/packed_simd/issues/5 // // If there is a NaN, the result should always // the smallest element, but currently when the // last element is NaN the current // implementation incorrectly returns NaN. // // The targets mentioned above use different // codegen that produces the correct result. // // These asserts detect if this behavior changes assert!(v.min_element().is_nan(), // FIXME: ^^^ should be -3. "[A]: nan at {} => {} | {:?}", i, v.min_element(), v); // If we replace all the elements in the vector // up-to the `i-th` lane with `NaN`s, the result // is still always `-3.` unless all elements of // the vector are `NaN`s: // // This is also broken: for j in 0..i { v = v.replace(j, n); assert!(v.min_element().is_nan(), // FIXME: ^^^ should be -3. "[B]: nan at {} => {} | {:?}", i, v.min_element(), v); } // We are done here, since we were in the last // lane which is the last iteration of the loop. break } // We are not in the last lane, and there is only // one `NaN` in the vector. // If the vector has one lane, the result is `NaN`: if $id::lanes() == 1 { assert!(v.min_element().is_nan(), "[C]: all nans | v={:?} | min={} | \ is_nan: {}", v, v.min_element(), v.min_element().is_nan() ); // And we are done, since the vector only has // one lane anyways. break; } // The vector has more than one lane, since there is // only one `NaN` in the vector, the result is // always `-3`. assert_eq!(v.min_element(), -3., "[D]: nan at {} => {} | {:?}", i, v.min_element(), v); // If we replace all the elements in the vector // up-to the `i-th` lane with `NaN`s, the result is // still always `-3.` unless all elements of the // vector are `NaN`s: for j in 0..i { v = v.replace(j, n); if i == $id::lanes() - 1 && j == i - 1 { // All elements of the vector are `NaN`s, // therefore the result is NaN as well. // // Note: the #lanes of the vector is > 1, so // "i - 1" does not overflow. assert!(v.min_element().is_nan(), "[E]: all nans | v={:?} | min={} | \ is_nan: {}", v, v.min_element(), v.min_element().is_nan()); } else { // There are non-`NaN` elements in the // vector, therefore the result is `-3.`: assert_eq!(v.min_element(), -3., "[F]: nan at {} => {} | {:?}", i, v.min_element(), v); } } } // If the vector contains all NaNs the result is NaN: assert!($id::splat(n).min_element().is_nan(), "all nans | v={:?} | min={} | is_nan: {}", $id::splat(n), $id::splat(n).min_element(), $id::splat(n).min_element().is_nan()); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn max_element_test() { let n = crate::$elem_ty::NAN; assert_eq!(n.max(-3.), -3.); assert_eq!((-3. as $elem_ty).max(n), -3.); let v0 = $id::splat(-3.); let target_with_broken_last_lane_nan = !cfg!(any( target_arch = "arm", target_arch = "aarch64", target_arch = "powerpc64", target_arch = "wasm32", )); // The vector is initialized to `-3.`s: [-3, -3, -3, -3] for i in 0..$id::lanes() { // We replace the i-th element of the vector with // `NaN`: [-3, -3, -3, NaN] let mut v = v0.replace(i, n); // If the NaN is in the last place, the LLVM // implementation of these methods is broken on some // targets: if i == $id::lanes() - 1 && target_with_broken_last_lane_nan { // FIXME: // https://github.com/rust-lang-nursery/packed_simd/issues/5 // // If there is a NaN, the result should // always the largest element, but currently // when the last element is NaN the current // implementation incorrectly returns NaN. // // The targets mentioned above use different // codegen that produces the correct result. // // These asserts detect if this behavior // changes assert!(v.max_element().is_nan(), // FIXME: ^^^ should be -3. "[A]: nan at {} => {} | {:?}", i, v.max_element(), v); // If we replace all the elements in the vector // up-to the `i-th` lane with `NaN`s, the result // is still always `-3.` unless all elements of // the vector are `NaN`s: // // This is also broken: for j in 0..i { v = v.replace(j, n); assert!(v.max_element().is_nan(), // FIXME: ^^^ should be -3. "[B]: nan at {} => {} | {:?}", i, v.max_element(), v); } // We are done here, since we were in the last // lane which is the last iteration of the loop. break } // We are not in the last lane, and there is only // one `NaN` in the vector. // If the vector has one lane, the result is `NaN`: if $id::lanes() == 1 { assert!(v.max_element().is_nan(), "[C]: all nans | v={:?} | min={} | \ is_nan: {}", v, v.max_element(), v.max_element().is_nan()); // And we are done, since the vector only has // one lane anyways. break; } // The vector has more than one lane, since there is // only one `NaN` in the vector, the result is // always `-3`. assert_eq!(v.max_element(), -3., "[D]: nan at {} => {} | {:?}", i, v.max_element(), v); // If we replace all the elements in the vector // up-to the `i-th` lane with `NaN`s, the result is // still always `-3.` unless all elements of the // vector are `NaN`s: for j in 0..i { v = v.replace(j, n); if i == $id::lanes() - 1 && j == i - 1 { // All elements of the vector are `NaN`s, // therefore the result is NaN as well. // // Note: the #lanes of the vector is > 1, so // "i - 1" does not overflow. assert!(v.max_element().is_nan(), "[E]: all nans | v={:?} | max={} | \ is_nan: {}", v, v.max_element(), v.max_element().is_nan()); } else { // There are non-`NaN` elements in the // vector, therefore the result is `-3.`: assert_eq!(v.max_element(), -3., "[F]: nan at {} => {} | {:?}", i, v.max_element(), v); } } } // If the vector contains all NaNs the result is NaN: assert!($id::splat(n).max_element().is_nan(), "all nans | v={:?} | max={} | is_nan: {}", $id::splat(n), $id::splat(n).max_element(), $id::splat(n).max_element().is_nan()); } } } } } } packed_simd-0.3.3/src/api/select.rs010064400007650000024000000047731342460246600154060ustar0000000000000000//! Implements mask's `select`. /// Implements mask select method macro_rules! impl_select { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Selects elements of `a` and `b` using mask. /// /// The lanes of the result for which the mask is `true` contain /// the values of `a`. The remaining lanes contain the values of /// `b`. #[inline] pub fn select(self, a: Simd, b: Simd) -> Simd where T: sealed::SimdArray< NT = <[$elem_ty; $elem_count] as sealed::SimdArray>::NT, >, { use crate::llvm::simd_select; Simd(unsafe { simd_select(self.0, a.0, b.0) }) } } test_select!(bool, $id, $id, (false, true) | $test_tt); }; } macro_rules! test_select { ( $elem_ty:ident, $mask_ty:ident, $vec_ty:ident,($small:expr, $large:expr) | $test_tt:tt ) => { test_if! { $test_tt: paste::item! { pub mod [<$vec_ty _select>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn select() { let o = $small as $elem_ty; let t = $large as $elem_ty; let a = $vec_ty::splat(o); let b = $vec_ty::splat(t); let m = a.lt(b); assert_eq!(m.select(a, b), a); let m = b.lt(a); assert_eq!(m.select(b, a), a); let mut c = a; let mut d = b; let mut m_e = $mask_ty::splat(false); for i in 0..$vec_ty::lanes() { if i % 2 == 0 { let c_tmp = c.extract(i); c = c.replace(i, d.extract(i)); d = d.replace(i, c_tmp); } else { m_e = m_e.replace(i, true); } } let m = c.lt(d); assert_eq!(m_e, m); assert_eq!(m.select(c, d), a); } } } } }; } packed_simd-0.3.3/src/api/shuffle.rs010064400007650000024000000150611342460246600155530ustar0000000000000000//! Implements portable vector shuffles with immediate indices. // FIXME: comprehensive tests // https://github.com/rust-lang-nursery/packed_simd/issues/20 /// Shuffles vector elements. /// /// This macro returns a new vector that contains a shuffle of the elements in /// one (`shuffle!(vec, [indices...])`) or two (`shuffle!(vec0, vec1, /// [indices...])`) input vectors. /// /// The type of `vec0` and `vec1` must be equal, and the element type of the /// resulting vector is the element type of the input vector. /// /// The number of `indices` must be a power-of-two in range `[0, 64)`, since /// currently, the largest vector supported by the library has 64 lanes. The /// length of the resulting vector equals the number of indices provided. /// /// The indices must be in range `[0, M * N)` where `M` is the number of input /// vectors (`1` or `2`) and `N` is the number of lanes of the input vectors. /// The indices `i` in range `[0, N)` refer to the `i`-th element of `vec0`, /// while the indices in range `[N, 2*N)` refer to the `i - N`-th element of /// `vec1`. /// /// # Examples /// /// Shuffling elements of two vectors: /// /// ``` /// # #[macro_use] /// # extern crate packed_simd; /// # use packed_simd::*; /// # fn main() { /// // Shuffle allows reordering the elements: /// let x = i32x4::new(1, 2, 3, 4); /// let y = i32x4::new(5, 6, 7, 8); /// let r = shuffle!(x, y, [4, 0, 5, 1]); /// assert_eq!(r, i32x4::new(5, 1, 6, 2)); /// /// // The resulting vector can als be smaller than the input: /// let r = shuffle!(x, y, [1, 6]); /// assert_eq!(r, i32x2::new(2, 7)); /// /// // Or larger: /// let r = shuffle!(x, y, [1, 3, 4, 2, 1, 7, 2, 2]); /// assert_eq!(r, i32x8::new(2, 4, 5, 3, 2, 8, 3, 3)); /// // At most 2 * the number of lanes in the input vector. /// # } /// ``` /// /// Shuffling elements of one vector: /// /// ``` /// # #[macro_use] /// # extern crate packed_simd; /// # use packed_simd::*; /// # fn main() { /// // Shuffle allows reordering the elements of a vector: /// let x = i32x4::new(1, 2, 3, 4); /// let r = shuffle!(x, [2, 1, 3, 0]); /// assert_eq!(r, i32x4::new(3, 2, 4, 1)); /// /// // The resulting vector can be smaller than the input: /// let r = shuffle!(x, [1, 3]); /// assert_eq!(r, i32x2::new(2, 4)); /// /// // Equal: /// let r = shuffle!(x, [1, 3, 2, 0]); /// assert_eq!(r, i32x4::new(2, 4, 3, 1)); /// /// // Or larger: /// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]); /// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3)); /// // At most 2 * the number of lanes in the input vector. /// # } /// ``` #[macro_export] macro_rules! shuffle { ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {{ #[allow(unused_unsafe)] unsafe { $crate::Simd($crate::__shuffle_vector2( $vec0.0, $vec1.0, [$l0, $l1], )) } }}; ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {{ #[allow(unused_unsafe)] unsafe { $crate::Simd($crate::__shuffle_vector4( $vec0.0, $vec1.0, [$l0, $l1, $l2, $l3], )) } }}; ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr, $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {{ #[allow(unused_unsafe)] unsafe { $crate::Simd($crate::__shuffle_vector8( $vec0.0, $vec1.0, [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7], )) } }}; ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr, $l4:expr, $l5:expr, $l6:expr, $l7:expr, $l8:expr, $l9:expr, $l10:expr, $l11:expr, $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {{ #[allow(unused_unsafe)] unsafe { $crate::Simd($crate::__shuffle_vector16( $vec0.0, $vec1.0, [ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, $l11, $l12, $l13, $l14, $l15, ], )) } }}; ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr, $l4:expr, $l5:expr, $l6:expr, $l7:expr, $l8:expr, $l9:expr, $l10:expr, $l11:expr, $l12:expr, $l13:expr, $l14:expr, $l15:expr, $l16:expr, $l17:expr, $l18:expr, $l19:expr, $l20:expr, $l21:expr, $l22:expr, $l23:expr, $l24:expr, $l25:expr, $l26:expr, $l27:expr, $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {{ #[allow(unused_unsafe)] unsafe { $crate::Simd($crate::__shuffle_vector32( $vec0.0, $vec1.0, [ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, $l29, $l30, $l31, ], )) } }}; ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr, $l4:expr, $l5:expr, $l6:expr, $l7:expr, $l8:expr, $l9:expr, $l10:expr, $l11:expr, $l12:expr, $l13:expr, $l14:expr, $l15:expr, $l16:expr, $l17:expr, $l18:expr, $l19:expr, $l20:expr, $l21:expr, $l22:expr, $l23:expr, $l24:expr, $l25:expr, $l26:expr, $l27:expr, $l28:expr, $l29:expr, $l30:expr, $l31:expr, $l32:expr, $l33:expr, $l34:expr, $l35:expr, $l36:expr, $l37:expr, $l38:expr, $l39:expr, $l40:expr, $l41:expr, $l42:expr, $l43:expr, $l44:expr, $l45:expr, $l46:expr, $l47:expr, $l48:expr, $l49:expr, $l50:expr, $l51:expr, $l52:expr, $l53:expr, $l54:expr, $l55:expr, $l56:expr, $l57:expr, $l58:expr, $l59:expr, $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {{ #[allow(unused_unsafe)] unsafe { $crate::Simd($crate::__shuffle_vector64( $vec0.0, $vec1.0, [ $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, $l29, $l30, $l31, $l32, $l33, $l34, $l35, $l36, $l37, $l38, $l39, $l40, $l41, $l42, $l43, $l44, $l45, $l46, $l47, $l48, $l49, $l50, $l51, $l52, $l53, $l54, $l55, $l56, $l57, $l58, $l59, $l60, $l61, $l62, $l63, ], )) } }}; ($vec:expr, [$($l:expr),*]) => { match $vec { v => shuffle!(v, v, [$($l),*]) } }; } packed_simd-0.3.3/src/api/shuffle1_dyn.rs010064400007650000024000000146521342460246600165130ustar0000000000000000//! Shuffle vector elements according to a dynamic vector of indices. macro_rules! impl_shuffle1_dyn { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Shuffle vector elements according to `indices`. #[inline] pub fn shuffle1_dyn(self, indices: I) -> Self where Self: codegen::shuffle1_dyn::Shuffle1Dyn, { codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) } } }; } macro_rules! test_shuffle1_dyn { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { test_if! { $test_tt: paste::item! { pub mod [<$id _shuffle1_dyn>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn shuffle1_dyn() { let increasing = { let mut v = $id::splat(0 as $elem_ty); for i in 0..$id::lanes() { v = v.replace(i, i as $elem_ty); } v }; let decreasing = { let mut v = $id::splat(0 as $elem_ty); for i in 0..$id::lanes() { v = v.replace( i, ($id::lanes() - 1 - i) as $elem_ty ); } v }; type Indices = < $id as codegen::shuffle1_dyn::Shuffle1Dyn >::Indices; let increasing_ids: Indices = increasing.cast(); let decreasing_ids: Indices = decreasing.cast(); assert_eq!( increasing.shuffle1_dyn(increasing_ids), increasing, "(i,i)=>i" ); assert_eq!( decreasing.shuffle1_dyn(increasing_ids), decreasing, "(d,i)=>d" ); assert_eq!( increasing.shuffle1_dyn(decreasing_ids), decreasing, "(i,d)=>d" ); assert_eq!( decreasing.shuffle1_dyn(decreasing_ids), increasing, "(d,d)=>i" ); for i in 0..$id::lanes() { let v_ids: Indices = $id::splat(i as $elem_ty).cast(); assert_eq!(increasing.shuffle1_dyn(v_ids), $id::splat(increasing.extract(i)) ); assert_eq!(decreasing.shuffle1_dyn(v_ids), $id::splat(decreasing.extract(i)) ); assert_eq!( $id::splat(i as $elem_ty) .shuffle1_dyn(increasing_ids), $id::splat(i as $elem_ty) ); assert_eq!( $id::splat(i as $elem_ty) .shuffle1_dyn(decreasing_ids), $id::splat(i as $elem_ty) ); } } } } } }; } macro_rules! test_shuffle1_dyn_mask { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { test_if! { $test_tt: paste::item! { pub mod [<$id _shuffle1_dyn>] { use super::*; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn shuffle1_dyn() { // alternating = [true, false, true, false, ...] let mut alternating = $id::splat(false); for i in 0..$id::lanes() { if i % 2 == 0 { alternating = alternating.replace(i, true); } } type Indices = < $id as codegen::shuffle1_dyn::Shuffle1Dyn >::Indices; // even = [0, 0, 2, 2, 4, 4, ..] let even = { let mut v = Indices::splat(0); for i in 0..$id::lanes() { if i % 2 == 0 { v = v.replace(i, (i as u8).into()); } else { v = v.replace(i, (i as u8 - 1).into()); } } v }; // odd = [1, 1, 3, 3, 5, 5, ...] let odd = { let mut v = Indices::splat(0); for i in 0..$id::lanes() { if i % 2 != 0 { v = v.replace(i, (i as u8).into()); } else { v = v.replace(i, (i as u8 + 1).into()); } } v }; assert_eq!( alternating.shuffle1_dyn(even), $id::splat(true) ); if $id::lanes() > 1 { assert_eq!( alternating.shuffle1_dyn(odd), $id::splat(false) ); } } } } } }; } packed_simd-0.3.3/src/api/slice.rs010064400007650000024000000001321332536563700152150ustar0000000000000000//! Slice from/to methods #[macro_use] mod from_slice; #[macro_use] mod write_to_slice; packed_simd-0.3.3/src/api/slice/from_slice.rs010064400007650000024000000223361342461154100173350ustar0000000000000000//! Implements methods to read a vector type from a slice. macro_rules! impl_slice_from_slice { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Instantiates a new vector with the values of the `slice`. /// /// # Panics /// /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned /// to an `align_of::()` boundary. #[inline] pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { unsafe { assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked(0) as *const $elem_ty; assert_eq!( target_ptr .align_offset(crate::mem::align_of::()), 0 ); Self::from_slice_aligned_unchecked(slice) } } /// Instantiates a new vector with the values of the `slice`. /// /// # Panics /// /// If `slice.len() < Self::lanes()`. #[inline] pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { unsafe { assert!(slice.len() >= $elem_count); Self::from_slice_unaligned_unchecked(slice) } } /// Instantiates a new vector with the values of the `slice`. /// /// # Precondition /// /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned /// to an `align_of::()` boundary, the behavior is undefined. #[inline] pub unsafe fn from_slice_aligned_unchecked( slice: &[$elem_ty], ) -> Self { debug_assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked(0) as *const $elem_ty; debug_assert_eq!( target_ptr.align_offset(crate::mem::align_of::()), 0 ); #[cfg_attr( feature = "cargo-clippy", allow(clippy::cast_ptr_alignment) )] *(target_ptr as *const Self) } /// Instantiates a new vector with the values of the `slice`. /// /// # Precondition /// /// If `slice.len() < Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn from_slice_unaligned_unchecked( slice: &[$elem_ty], ) -> Self { use crate::mem::size_of; debug_assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked(0) as *const $elem_ty as *const u8; let mut x = Self::splat(0 as $elem_ty); let self_ptr = &mut x as *mut Self as *mut u8; crate::ptr::copy_nonoverlapping( target_ptr, self_ptr, size_of::(), ); x } } test_if! { $test_tt: paste::item! { pub mod [<$id _slice_from_slice>] { use super::*; use crate::iter::Iterator; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn from_slice_unaligned() { let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; unaligned[0] = 0 as $elem_ty; let vec = $id::from_slice_unaligned(&unaligned[1..]); for (index, &b) in unaligned.iter().enumerate() { if index == 0 { assert_eq!(b, 0 as $elem_ty); } else { assert_eq!(b, 42 as $elem_ty); assert_eq!(b, vec.extract(index - 1)); } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn from_slice_unaligned_fail() { let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; unaligned[0] = 0 as $elem_ty; // the slice is not large enough => panic let _vec = $id::from_slice_unaligned(&unaligned[2..]); } union A { data: [$elem_ty; 2 * $id::lanes()], _vec: $id, } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn from_slice_aligned() { let mut aligned = A { data: [0 as $elem_ty; 2 * $id::lanes()], }; for i in $id::lanes()..(2 * $id::lanes()) { unsafe { aligned.data[i] = 42 as $elem_ty; } } let vec = unsafe { $id::from_slice_aligned( &aligned.data[$id::lanes()..] ) }; for (index, &b) in unsafe { aligned.data.iter().enumerate() } { if index < $id::lanes() { assert_eq!(b, 0 as $elem_ty); } else { assert_eq!(b, 42 as $elem_ty); assert_eq!( b, vec.extract(index - $id::lanes()) ); } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn from_slice_aligned_fail_lanes() { let aligned = A { data: [0 as $elem_ty; 2 * $id::lanes()], }; let _vec = unsafe { $id::from_slice_aligned( &aligned.data[2 * $id::lanes()..] ) }; } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn from_slice_aligned_fail_align() { unsafe { let aligned = A { data: [0 as $elem_ty; 2 * $id::lanes()], }; // get a pointer to the front of data let ptr: *const $elem_ty = aligned.data.as_ptr() as *const $elem_ty; // offset pointer by one element let ptr = ptr.wrapping_add(1); if ptr.align_offset( crate::mem::align_of::<$id>() ) == 0 { // the pointer is properly aligned, so // from_slice_aligned won't fail here (e.g. this // can happen for i128x1). So we panic to make // the "should_fail" test pass: panic!("ok"); } // create a slice - this is safe, because the // elements of the slice exist, are properly // initialized, and properly aligned: let s: &[$elem_ty] = slice::from_raw_parts( ptr, $id::lanes() ); // this should always panic because the slice // alignment does not match the alignment // requirements for the vector type: let _vec = $id::from_slice_aligned(s); } } } } } }; } packed_simd-0.3.3/src/api/slice/write_to_slice.rs010064400007650000024000000227741342461154100202340ustar0000000000000000//! Implements methods to write a vector type to a slice. macro_rules! impl_slice_write_to_slice { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Writes the values of the vector to the `slice`. /// /// # Panics /// /// If `slice.len() < Self::lanes()` or `&slice[0]` is not /// aligned to an `align_of::()` boundary. #[inline] pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { unsafe { assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty; assert_eq!( target_ptr .align_offset(crate::mem::align_of::()), 0 ); self.write_to_slice_aligned_unchecked(slice); } } /// Writes the values of the vector to the `slice`. /// /// # Panics /// /// If `slice.len() < Self::lanes()`. #[inline] pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { unsafe { assert!(slice.len() >= $elem_count); self.write_to_slice_unaligned_unchecked(slice); } } /// Writes the values of the vector to the `slice`. /// /// # Precondition /// /// If `slice.len() < Self::lanes()` or `&slice[0]` is not /// aligned to an `align_of::()` boundary, the behavior is /// undefined. #[inline] pub unsafe fn write_to_slice_aligned_unchecked( self, slice: &mut [$elem_ty], ) { debug_assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty; debug_assert_eq!( target_ptr.align_offset(crate::mem::align_of::()), 0 ); #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] #[cfg_attr( feature = "cargo-clippy", allow(clippy::cast_ptr_alignment) )] #[cfg_attr( feature = "cargo-clippy", allow(clippy::cast_ptr_alignment) )] #[cfg_attr( feature = "cargo-clippy", allow(clippy::cast_ptr_alignment) )] *(target_ptr as *mut Self) = self; } /// Writes the values of the vector to the `slice`. /// /// # Precondition /// /// If `slice.len() < Self::lanes()` the behavior is undefined. #[inline] pub unsafe fn write_to_slice_unaligned_unchecked( self, slice: &mut [$elem_ty], ) { debug_assert!(slice.len() >= $elem_count); let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; let self_ptr = &self as *const Self as *const u8; crate::ptr::copy_nonoverlapping( self_ptr, target_ptr, crate::mem::size_of::(), ); } } test_if! { $test_tt: paste::item! { pub mod [<$id _slice_write_to_slice>] { use super::*; use crate::iter::Iterator; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn write_to_slice_unaligned() { let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; let vec = $id::splat(42 as $elem_ty); vec.write_to_slice_unaligned(&mut unaligned[1..]); for (index, &b) in unaligned.iter().enumerate() { if index == 0 { assert_eq!(b, 0 as $elem_ty); } else { assert_eq!(b, 42 as $elem_ty); assert_eq!(b, vec.extract(index - 1)); } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn write_to_slice_unaligned_fail() { let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; let vec = $id::splat(42 as $elem_ty); vec.write_to_slice_unaligned(&mut unaligned[2..]); } union A { data: [$elem_ty; 2 * $id::lanes()], _vec: $id, } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn write_to_slice_aligned() { let mut aligned = A { data: [0 as $elem_ty; 2 * $id::lanes()], }; let vec = $id::splat(42 as $elem_ty); unsafe { vec.write_to_slice_aligned( &mut aligned.data[$id::lanes()..] ); for (idx, &b) in aligned.data.iter().enumerate() { if idx < $id::lanes() { assert_eq!(b, 0 as $elem_ty); } else { assert_eq!(b, 42 as $elem_ty); assert_eq!( b, vec.extract(idx - $id::lanes()) ); } } } } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn write_to_slice_aligned_fail_lanes() { let mut aligned = A { data: [0 as $elem_ty; 2 * $id::lanes()], }; let vec = $id::splat(42 as $elem_ty); unsafe { vec.write_to_slice_aligned( &mut aligned.data[2 * $id::lanes()..] ) }; } // FIXME: wasm-bindgen-test does not support #[should_panic] // #[cfg_attr(not(target_arch = "wasm32"), test)] // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] #[cfg(not(target_arch = "wasm32"))] #[test] #[should_panic] fn write_to_slice_aligned_fail_align() { unsafe { let mut aligned = A { data: [0 as $elem_ty; 2 * $id::lanes()], }; // get a pointer to the front of data let ptr: *mut $elem_ty = aligned.data.as_mut_ptr() as *mut $elem_ty; // offset pointer by one element let ptr = ptr.wrapping_add(1); if ptr.align_offset(crate::mem::align_of::<$id>()) == 0 { // the pointer is properly aligned, so // write_to_slice_aligned won't fail here (e.g. // this can happen for i128x1). So we panic to // make the "should_fail" test pass: panic!("ok"); } // create a slice - this is safe, because the // elements of the slice exist, are properly // initialized, and properly aligned: let s: &mut [$elem_ty] = slice::from_raw_parts_mut(ptr, $id::lanes()); // this should always panic because the slice // alignment does not match the alignment // requirements for the vector type: let vec = $id::splat(42 as $elem_ty); vec.write_to_slice_aligned(s); } } } } } }; } packed_simd-0.3.3/src/api/swap_bytes.rs010064400007650000024000000156711342460246600163060ustar0000000000000000//! Horizontal swap bytes macro_rules! impl_swap_bytes { ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { impl $id { /// Reverses the byte order of the vector. #[inline] pub fn swap_bytes(self) -> Self { super::codegen::swap_bytes::SwapBytes::swap_bytes(self) } /// Converts self to little endian from the target's endianness. /// /// On little endian this is a no-op. On big endian the bytes are /// swapped. #[inline] pub fn to_le(self) -> Self { #[cfg(target_endian = "little")] { self } #[cfg(not(target_endian = "little"))] { self.swap_bytes() } } /// Converts self to big endian from the target's endianness. /// /// On big endian this is a no-op. On little endian the bytes are /// swapped. #[inline] pub fn to_be(self) -> Self { #[cfg(target_endian = "big")] { self } #[cfg(not(target_endian = "big"))] { self.swap_bytes() } } /// Converts a vector from little endian to the target's endianness. /// /// On little endian this is a no-op. On big endian the bytes are /// swapped. #[inline] pub fn from_le(x: Self) -> Self { #[cfg(target_endian = "little")] { x } #[cfg(not(target_endian = "little"))] { x.swap_bytes() } } /// Converts a vector from big endian to the target's endianness. /// /// On big endian this is a no-op. On little endian the bytes are /// swapped. #[inline] pub fn from_be(x: Self) -> Self { #[cfg(target_endian = "big")] { x } #[cfg(not(target_endian = "big"))] { x.swap_bytes() } } } test_if! { $test_tt: paste::item_with_macros! { pub mod [<$id _swap_bytes>] { use super::*; const BYTES: [u8; 64] = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, ]; macro_rules! swap { ($func: ident) => {{ // catch possible future >512 vectors assert!(mem::size_of::<$id>() <= 64); let mut actual = BYTES; let elems: &mut [$elem_ty] = unsafe { slice::from_raw_parts_mut( actual.as_mut_ptr() as *mut $elem_ty, $id::lanes(), ) }; let vec = $id::from_slice_unaligned(elems); $id::$func(vec).write_to_slice_unaligned(elems); actual }}; } macro_rules! test_swap { ($func: ident) => {{ let actual = swap!($func); let expected = BYTES.iter().rev() .skip(64 - crate::mem::size_of::<$id>()); assert!(actual.iter().zip(expected) .all(|(x, y)| x == y)); }}; } macro_rules! test_no_swap { ($func: ident) => {{ let actual = swap!($func); let expected = BYTES.iter() .take(mem::size_of::<$id>()); assert!(actual.iter().zip(expected) .all(|(x, y)| x == y)); }}; } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn swap_bytes() { test_swap!(swap_bytes); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn to_le() { #[cfg(target_endian = "little")] { test_no_swap!(to_le); } #[cfg(not(target_endian = "little"))] { test_swap!(to_le); } } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn to_be() { #[cfg(target_endian = "big")] { test_no_swap!(to_be); } #[cfg(not(target_endian = "big"))] { test_swap!(to_be); } } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn from_le() { #[cfg(target_endian = "little")] { test_no_swap!(from_le); } #[cfg(not(target_endian = "little"))] { test_swap!(from_le); } } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn from_be() { #[cfg(target_endian = "big")] { test_no_swap!(from_be); } #[cfg(not(target_endian = "big"))] { test_swap!(from_be); } } } } } }; } packed_simd-0.3.3/src/codegen.rs010064400007650000024000000023231342461154100147410ustar0000000000000000//! Code-generation utilities crate mod bit_manip; crate mod llvm; crate mod math; crate mod reductions; crate mod shuffle; crate mod shuffle1_dyn; crate mod swap_bytes; macro_rules! impl_simd_array { ([$elem_ty:ident; $elem_count:expr]: $tuple_id:ident | $($elem_tys:ident),*) => { #[derive(Copy, Clone)] #[repr(simd)] pub struct $tuple_id($(crate $elem_tys),*); //^^^^^^^ leaked through SimdArray impl crate::sealed::SimdArray for [$elem_ty; $elem_count] { type Tuple = $tuple_id; type T = $elem_ty; const N: usize = $elem_count; type NT = [u32; $elem_count]; } impl crate::sealed::Simd for $tuple_id { type Element = $elem_ty; const LANES: usize = $elem_count; type LanesType = [u32; $elem_count]; } } } crate mod pointer_sized_int; crate mod v16; crate use self::v16::*; crate mod v32; crate use self::v32::*; crate mod v64; crate use self::v64::*; crate mod v128; crate use self::v128::*; crate mod v256; crate use self::v256::*; crate mod v512; crate use self::v512::*; crate mod vSize; crate use self::vSize::*; crate mod vPtr; crate use self::vPtr::*; packed_simd-0.3.3/src/codegen/bit_manip.rs010064400007650000024000000326161342461154100167130ustar0000000000000000//! LLVM bit manipulation intrinsics. #![rustfmt::skip] use crate::*; #[allow(improper_ctypes, dead_code)] extern "C" { #[link_name = "llvm.ctlz.v2i8"] fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; #[link_name = "llvm.ctlz.v4i8"] fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; #[link_name = "llvm.ctlz.v8i8"] fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; #[link_name = "llvm.ctlz.v16i8"] fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; #[link_name = "llvm.ctlz.v32i8"] fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; #[link_name = "llvm.ctlz.v64i8"] fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; #[link_name = "llvm.ctlz.v2i16"] fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; #[link_name = "llvm.ctlz.v4i16"] fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; #[link_name = "llvm.ctlz.v8i16"] fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; #[link_name = "llvm.ctlz.v16i16"] fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; #[link_name = "llvm.ctlz.v32i16"] fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; #[link_name = "llvm.ctlz.v2i32"] fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; #[link_name = "llvm.ctlz.v4i32"] fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; #[link_name = "llvm.ctlz.v8i32"] fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; #[link_name = "llvm.ctlz.v16i32"] fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; #[link_name = "llvm.ctlz.v2i64"] fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; #[link_name = "llvm.ctlz.v4i64"] fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; #[link_name = "llvm.ctlz.v8i64"] fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; #[link_name = "llvm.ctlz.v1i128"] fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; #[link_name = "llvm.ctlz.v2i128"] fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; #[link_name = "llvm.ctlz.v4i128"] fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; #[link_name = "llvm.cttz.v2i8"] fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; #[link_name = "llvm.cttz.v4i8"] fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; #[link_name = "llvm.cttz.v8i8"] fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; #[link_name = "llvm.cttz.v16i8"] fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; #[link_name = "llvm.cttz.v32i8"] fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; #[link_name = "llvm.cttz.v64i8"] fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; #[link_name = "llvm.cttz.v2i16"] fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; #[link_name = "llvm.cttz.v4i16"] fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; #[link_name = "llvm.cttz.v8i16"] fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; #[link_name = "llvm.cttz.v16i16"] fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; #[link_name = "llvm.cttz.v32i16"] fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; #[link_name = "llvm.cttz.v2i32"] fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; #[link_name = "llvm.cttz.v4i32"] fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; #[link_name = "llvm.cttz.v8i32"] fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; #[link_name = "llvm.cttz.v16i32"] fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; #[link_name = "llvm.cttz.v2i64"] fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; #[link_name = "llvm.cttz.v4i64"] fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; #[link_name = "llvm.cttz.v8i64"] fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; #[link_name = "llvm.cttz.v1i128"] fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; #[link_name = "llvm.cttz.v2i128"] fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; #[link_name = "llvm.cttz.v4i128"] fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; #[link_name = "llvm.ctpop.v2i8"] fn ctpop_u8x2(x: u8x2) -> u8x2; #[link_name = "llvm.ctpop.v4i8"] fn ctpop_u8x4(x: u8x4) -> u8x4; #[link_name = "llvm.ctpop.v8i8"] fn ctpop_u8x8(x: u8x8) -> u8x8; #[link_name = "llvm.ctpop.v16i8"] fn ctpop_u8x16(x: u8x16) -> u8x16; #[link_name = "llvm.ctpop.v32i8"] fn ctpop_u8x32(x: u8x32) -> u8x32; #[link_name = "llvm.ctpop.v64i8"] fn ctpop_u8x64(x: u8x64) -> u8x64; #[link_name = "llvm.ctpop.v2i16"] fn ctpop_u16x2(x: u16x2) -> u16x2; #[link_name = "llvm.ctpop.v4i16"] fn ctpop_u16x4(x: u16x4) -> u16x4; #[link_name = "llvm.ctpop.v8i16"] fn ctpop_u16x8(x: u16x8) -> u16x8; #[link_name = "llvm.ctpop.v16i16"] fn ctpop_u16x16(x: u16x16) -> u16x16; #[link_name = "llvm.ctpop.v32i16"] fn ctpop_u16x32(x: u16x32) -> u16x32; #[link_name = "llvm.ctpop.v2i32"] fn ctpop_u32x2(x: u32x2) -> u32x2; #[link_name = "llvm.ctpop.v4i32"] fn ctpop_u32x4(x: u32x4) -> u32x4; #[link_name = "llvm.ctpop.v8i32"] fn ctpop_u32x8(x: u32x8) -> u32x8; #[link_name = "llvm.ctpop.v16i32"] fn ctpop_u32x16(x: u32x16) -> u32x16; #[link_name = "llvm.ctpop.v2i64"] fn ctpop_u64x2(x: u64x2) -> u64x2; #[link_name = "llvm.ctpop.v4i64"] fn ctpop_u64x4(x: u64x4) -> u64x4; #[link_name = "llvm.ctpop.v8i64"] fn ctpop_u64x8(x: u64x8) -> u64x8; #[link_name = "llvm.ctpop.v1i128"] fn ctpop_u128x1(x: u128x1) -> u128x1; #[link_name = "llvm.ctpop.v2i128"] fn ctpop_u128x2(x: u128x2) -> u128x2; #[link_name = "llvm.ctpop.v4i128"] fn ctpop_u128x4(x: u128x4) -> u128x4; } crate trait BitManip { fn ctpop(self) -> Self; fn ctlz(self) -> Self; fn cttz(self) -> Self; } macro_rules! impl_bit_manip { (inner: $ty:ident, $scalar:ty, $uty:ident, $ctpop:ident, $ctlz:ident, $cttz:ident) => { // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192 #[cfg(target_arch = "s390x")] impl_bit_manip! { scalar: $ty, $scalar } #[cfg(not(target_arch = "s390x"))] impl BitManip for $ty { #[inline] fn ctpop(self) -> Self { let y: $uty = self.cast(); unsafe { $ctpop(y).cast() } } #[inline] fn ctlz(self) -> Self { let y: $uty = self.cast(); // the ctxx intrinsics need compile-time constant // `is_zero_undef` unsafe { $ctlz(y, false).cast() } } #[inline] fn cttz(self) -> Self { let y: $uty = self.cast(); unsafe { $cttz(y, false).cast() } } } }; (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => { #[cfg(target_arch = "s390x")] impl_bit_manip! { scalar: $ty, $scalar } #[cfg(not(target_arch = "s390x"))] impl BitManip for $ty { #[inline] fn ctpop(self) -> Self { let y: $uty = self.cast(); $uty::ctpop(y).cast() } #[inline] fn ctlz(self) -> Self { let y: $uty = self.cast(); $uty::ctlz(y).cast() } #[inline] fn cttz(self) -> Self { let y: $uty = self.cast(); $uty::cttz(y).cast() } } }; (scalar: $ty:ident, $scalar:ty) => { impl BitManip for $ty { #[inline] fn ctpop(self) -> Self { let mut ones = self; for i in 0..Self::lanes() { ones = ones .replace(i, self.extract(i).count_ones() as $scalar); } ones } #[inline] fn ctlz(self) -> Self { let mut lz = self; for i in 0..Self::lanes() { lz = lz.replace( i, self.extract(i).leading_zeros() as $scalar, ); } lz } #[inline] fn cttz(self) -> Self { let mut tz = self; for i in 0..Self::lanes() { tz = tz.replace( i, self.extract(i).trailing_zeros() as $scalar, ); } tz } } }; ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty, $ctpop:ident, $ctlz:ident, $cttz:ident) => { impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz } impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz } }; (sized: $usize:ident, $uscalar:ty, $isize:ident, $iscalar:ty, $ty:ident) => { impl_bit_manip! { sized_inner: $usize, $uscalar, $ty } impl_bit_manip! { sized_inner: $isize, $iscalar, $ty } }; } impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 } impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 } #[cfg(not(target_arch = "aarch64"))] // see below impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 } impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 } impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 } impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 } impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 } impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 } impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 } impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 } impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 } impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 } impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 } impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 } impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 } impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 } impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 } impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 } impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 } impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 } impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 } #[cfg(target_arch = "aarch64")] impl BitManip for u8x8 { #[inline] fn ctpop(self) -> Self { let y: u8x8 = self.cast(); unsafe { ctpop_u8x8(y).cast() } } #[inline] fn ctlz(self) -> Self { let y: u8x8 = self.cast(); unsafe { ctlz_u8x8(y, false).cast() } } #[inline] fn cttz(self) -> Self { // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 // intrinsics let mut tz = self; for i in 0..Self::lanes() { tz = tz.replace(i, self.extract(i).trailing_zeros() as u8); } tz } } #[cfg(target_arch = "aarch64")] impl BitManip for i8x8 { #[inline] fn ctpop(self) -> Self { let y: u8x8 = self.cast(); unsafe { ctpop_u8x8(y).cast() } } #[inline] fn ctlz(self) -> Self { let y: u8x8 = self.cast(); unsafe { ctlz_u8x8(y, false).cast() } } #[inline] fn cttz(self) -> Self { // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 // intrinsics let mut tz = self; for i in 0..Self::lanes() { tz = tz.replace(i, self.extract(i).trailing_zeros() as i8); } tz } } cfg_if! { if #[cfg(target_pointer_width = "8")] { impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 } impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 } impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 } } else if #[cfg(target_pointer_width = "16")] { impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 } impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 } impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 } } else if #[cfg(target_pointer_width = "32")] { impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 } impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 } impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 } } else if #[cfg(target_pointer_width = "64")] { impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 } impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 } impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 } } else { compile_error!("unsupported target_pointer_width"); } } packed_simd-0.3.3/src/codegen/llvm.rs010064400007650000024000000071471342467506300157350ustar0000000000000000//! LLVM's platform intrinsics #![allow(dead_code)] use crate::sealed::Shuffle; #[allow(unused_imports)] // FIXME: spurious warning? use crate::sealed::Simd; // Shuffle intrinsics: expanded in users' crates, therefore public. extern "platform-intrinsic" { // FIXME: Passing this intrinsics an `idx` array with an index that is // out-of-bounds will produce a monomorphization-time error. // https://github.com/rust-lang-nursery/packed_simd/issues/21 pub fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U where T: Simd, ::Element: Shuffle<[u32; 2], Output = U>; pub fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U where T: Simd, ::Element: Shuffle<[u32; 4], Output = U>; pub fn simd_shuffle8(x: T, y: T, idx: [u32; 8]) -> U where T: Simd, ::Element: Shuffle<[u32; 8], Output = U>; pub fn simd_shuffle16(x: T, y: T, idx: [u32; 16]) -> U where T: Simd, ::Element: Shuffle<[u32; 16], Output = U>; pub fn simd_shuffle32(x: T, y: T, idx: [u32; 32]) -> U where T: Simd, ::Element: Shuffle<[u32; 32], Output = U>; pub fn simd_shuffle64(x: T, y: T, idx: [u32; 64]) -> U where T: Simd, ::Element: Shuffle<[u32; 64], Output = U>; } pub use self::simd_shuffle16 as __shuffle_vector16; pub use self::simd_shuffle2 as __shuffle_vector2; pub use self::simd_shuffle32 as __shuffle_vector32; pub use self::simd_shuffle4 as __shuffle_vector4; pub use self::simd_shuffle64 as __shuffle_vector64; pub use self::simd_shuffle8 as __shuffle_vector8; extern "platform-intrinsic" { crate fn simd_eq(x: T, y: T) -> U; crate fn simd_ne(x: T, y: T) -> U; crate fn simd_lt(x: T, y: T) -> U; crate fn simd_le(x: T, y: T) -> U; crate fn simd_gt(x: T, y: T) -> U; crate fn simd_ge(x: T, y: T) -> U; crate fn simd_insert(x: T, idx: u32, val: U) -> T; crate fn simd_extract(x: T, idx: u32) -> U; crate fn simd_cast(x: T) -> U; crate fn simd_add(x: T, y: T) -> T; crate fn simd_sub(x: T, y: T) -> T; crate fn simd_mul(x: T, y: T) -> T; crate fn simd_div(x: T, y: T) -> T; crate fn simd_rem(x: T, y: T) -> T; crate fn simd_shl(x: T, y: T) -> T; crate fn simd_shr(x: T, y: T) -> T; crate fn simd_and(x: T, y: T) -> T; crate fn simd_or(x: T, y: T) -> T; crate fn simd_xor(x: T, y: T) -> T; crate fn simd_reduce_add_unordered(x: T) -> U; crate fn simd_reduce_mul_unordered(x: T) -> U; crate fn simd_reduce_add_ordered(x: T, acc: U) -> U; crate fn simd_reduce_mul_ordered(x: T, acc: U) -> U; crate fn simd_reduce_min(x: T) -> U; crate fn simd_reduce_max(x: T) -> U; crate fn simd_reduce_min_nanless(x: T) -> U; crate fn simd_reduce_max_nanless(x: T) -> U; crate fn simd_reduce_and(x: T) -> U; crate fn simd_reduce_or(x: T) -> U; crate fn simd_reduce_xor(x: T) -> U; crate fn simd_reduce_all(x: T) -> bool; crate fn simd_reduce_any(x: T) -> bool; crate fn simd_select(m: M, a: T, b: T) -> T; crate fn simd_fmin(a: T, b: T) -> T; crate fn simd_fmax(a: T, b: T) -> T; crate fn simd_fsqrt(a: T) -> T; crate fn simd_fma(a: T, b: T, c: T) -> T; crate fn simd_gather(value: T, pointers: P, mask: M) -> T; crate fn simd_scatter(value: T, pointers: P, mask: M); crate fn simd_bitmask(value: T) -> U; } packed_simd-0.3.3/src/codegen/math.rs010064400007650000024000000000571332536563700157100ustar0000000000000000//! Vertical math operations crate mod float; packed_simd-0.3.3/src/codegen/math/float.rs010064400007650000024000000005621342460246600170100ustar0000000000000000//! Vertical floating-point math operations. #![cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] #[macro_use] crate mod macros; crate mod abs; crate mod cos; crate mod cos_pi; crate mod exp; crate mod ln; crate mod mul_add; crate mod mul_adde; crate mod powf; crate mod sin; crate mod sin_cos_pi; crate mod sin_pi; crate mod sqrt; crate mod sqrte; packed_simd-0.3.3/src/codegen/math/float/abs.rs010064400007650000024000000072431342460246600175600ustar0000000000000000//! Vertical floating-point `fabs` #![allow(unused)] // FIXME 64-bit 1 elem vectors fabs use crate::*; crate trait Abs { fn abs(self) -> Self; } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.fabs.v2f32"] fn fabs_v2f32(x: f32x2) -> f32x2; #[link_name = "llvm.fabs.v4f32"] fn fabs_v4f32(x: f32x4) -> f32x4; #[link_name = "llvm.fabs.v8f32"] fn fabs_v8f32(x: f32x8) -> f32x8; #[link_name = "llvm.fabs.v16f32"] fn fabs_v16f32(x: f32x16) -> f32x16; /* FIXME 64-bit fabsgle elem vectors #[link_name = "llvm.fabs.v1f64"] fn fabs_v1f64(x: f64x1) -> f64x1; */ #[link_name = "llvm.fabs.v2f64"] fn fabs_v2f64(x: f64x2) -> f64x2; #[link_name = "llvm.fabs.v4f64"] fn fabs_v4f64(x: f64x4) -> f64x4; #[link_name = "llvm.fabs.v8f64"] fn fabs_v8f64(x: f64x8) -> f64x8; #[link_name = "llvm.fabs.f32"] fn fabs_f32(x: f32) -> f32; #[link_name = "llvm.fabs.f64"] fn fabs_f64(x: f64) -> f64; } gen_unary_impl_table!(Abs, abs); cfg_if! { if #[cfg(target_arch = "s390x")] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 impl_unary!(f32x2[f32; 2]: fabs_f32); impl_unary!(f32x4[f32; 4]: fabs_f32); impl_unary!(f32x8[f32; 8]: fabs_f32); impl_unary!(f32x16[f32; 16]: fabs_f32); impl_unary!(f64x2[f64; 2]: fabs_f64); impl_unary!(f64x4[f64; 4]: fabs_f64); impl_unary!(f64x8[f64; 8]: fabs_f64); } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_avx2128); impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx2); impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx2); impl_unary!(f32x4: Sleef_fabsf4_avx2128); impl_unary!(f32x8: Sleef_fabsf8_avx2); impl_unary!(f64x2: Sleef_fabsd2_avx2128); impl_unary!(f64x4: Sleef_fabsd4_avx2); } else if #[cfg(target_feature = "avx")] { impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); impl_unary!(f32x16[h => f32x8]: Sleef_fabsf8_avx); impl_unary!(f64x8[h => f64x4]: Sleef_fabsd4_avx); impl_unary!(f32x4: Sleef_fabsf4_sse4); impl_unary!(f32x8: Sleef_fabsf8_avx); impl_unary!(f64x2: Sleef_fabsd2_sse4); impl_unary!(f64x4: Sleef_fabsd4_avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary!(f32x2[t => f32x4]: Sleef_fabsf4_sse4); impl_unary!(f32x16[q => f32x4]: Sleef_fabsf4_sse4); impl_unary!(f64x8[q => f64x2]: Sleef_fabsd2_sse4); impl_unary!(f32x4: Sleef_fabsf4_sse4); impl_unary!(f32x8[h => f32x4]: Sleef_fabsf4_sse4); impl_unary!(f64x2: Sleef_fabsd2_sse4); impl_unary!(f64x4[h => f64x2]: Sleef_fabsd2_sse4); } else { impl_unary!(f32x2[f32; 2]: fabs_f32); impl_unary!(f32x16: fabs_v16f32); impl_unary!(f64x8: fabs_v8f64); impl_unary!(f32x4: fabs_v4f32); impl_unary!(f32x8: fabs_v8f32); impl_unary!(f64x2: fabs_v2f64); impl_unary!(f64x4: fabs_v4f64); } } } else { impl_unary!(f32x2[f32; 2]: fabs_f32); impl_unary!(f32x4: fabs_v4f32); impl_unary!(f32x8: fabs_v8f32); impl_unary!(f32x16: fabs_v16f32); impl_unary!(f64x2: fabs_v2f64); impl_unary!(f64x4: fabs_v4f64); impl_unary!(f64x8: fabs_v8f64); } } packed_simd-0.3.3/src/codegen/math/float/cos.rs010064400007650000024000000072401342460246600175740ustar0000000000000000//! Vertical floating-point `cos` #![allow(unused)] // FIXME 64-bit 1 elem vector cos use crate::*; crate trait Cos { fn cos(self) -> Self; } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.cos.v2f32"] fn cos_v2f32(x: f32x2) -> f32x2; #[link_name = "llvm.cos.v4f32"] fn cos_v4f32(x: f32x4) -> f32x4; #[link_name = "llvm.cos.v8f32"] fn cos_v8f32(x: f32x8) -> f32x8; #[link_name = "llvm.cos.v16f32"] fn cos_v16f32(x: f32x16) -> f32x16; /* FIXME 64-bit cosgle elem vectors #[link_name = "llvm.cos.v1f64"] fn cos_v1f64(x: f64x1) -> f64x1; */ #[link_name = "llvm.cos.v2f64"] fn cos_v2f64(x: f64x2) -> f64x2; #[link_name = "llvm.cos.v4f64"] fn cos_v4f64(x: f64x4) -> f64x4; #[link_name = "llvm.cos.v8f64"] fn cos_v8f64(x: f64x8) -> f64x8; #[link_name = "llvm.cos.f32"] fn cos_f32(x: f32) -> f32; #[link_name = "llvm.cos.f64"] fn cos_f64(x: f64) -> f64; } gen_unary_impl_table!(Cos, cos); cfg_if! { if #[cfg(target_arch = "s390x")] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 impl_unary!(f32x2[f32; 2]: cos_f32); impl_unary!(f32x4[f32; 4]: cos_f32); impl_unary!(f32x8[f32; 8]: cos_f32); impl_unary!(f32x16[f32; 16]: cos_f32); impl_unary!(f64x2[f64; 2]: cos_f64); impl_unary!(f64x4[f64; 4]: cos_f64); impl_unary!(f64x8[f64; 8]: cos_f64); } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10avx2128); impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx2); impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx2); impl_unary!(f32x4: Sleef_cosf4_u10avx2128); impl_unary!(f32x8: Sleef_cosf8_u10avx2); impl_unary!(f64x2: Sleef_cosd2_u10avx2128); impl_unary!(f64x4: Sleef_cosd4_u10avx2); } else if #[cfg(target_feature = "avx")] { impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); impl_unary!(f32x16[h => f32x8]: Sleef_cosf8_u10avx); impl_unary!(f64x8[h => f64x4]: Sleef_cosd4_u10avx); impl_unary!(f32x4: Sleef_cosf4_u10sse4); impl_unary!(f32x8: Sleef_cosf8_u10avx); impl_unary!(f64x2: Sleef_cosd2_u10sse4); impl_unary!(f64x4: Sleef_cosd4_u10avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary!(f32x2[t => f32x4]: Sleef_cosf4_u10sse4); impl_unary!(f32x16[q => f32x4]: Sleef_cosf4_u10sse4); impl_unary!(f64x8[q => f64x2]: Sleef_cosd2_u10sse4); impl_unary!(f32x4: Sleef_cosf4_u10sse4); impl_unary!(f32x8[h => f32x4]: Sleef_cosf4_u10sse4); impl_unary!(f64x2: Sleef_cosd2_u10sse4); impl_unary!(f64x4[h => f64x2]: Sleef_cosd2_u10sse4); } else { impl_unary!(f32x2[f32; 2]: cos_f32); impl_unary!(f32x16: cos_v16f32); impl_unary!(f64x8: cos_v8f64); impl_unary!(f32x4: cos_v4f32); impl_unary!(f32x8: cos_v8f32); impl_unary!(f64x2: cos_v2f64); impl_unary!(f64x4: cos_v4f64); } } } else { impl_unary!(f32x2[f32; 2]: cos_f32); impl_unary!(f32x4: cos_v4f32); impl_unary!(f32x8: cos_v8f32); impl_unary!(f32x16: cos_v16f32); impl_unary!(f64x2: cos_v2f64); impl_unary!(f64x4: cos_v4f64); impl_unary!(f64x8: cos_v8f64); } } packed_simd-0.3.3/src/codegen/math/float/cos_pi.rs010064400007650000024000000054061342460246600202660ustar0000000000000000//! Vertical floating-point `cos` #![allow(unused)] // FIXME 64-bit 1 elem vectors cos_pi use crate::*; crate trait CosPi { fn cos_pi(self) -> Self; } gen_unary_impl_table!(CosPi, cos_pi); macro_rules! impl_def { ($vid:ident, $PI:path) => { impl CosPi for $vid { #[inline] fn cos_pi(self) -> Self { (self * Self::splat($PI)).cos() } } }; } macro_rules! impl_def32 { ($vid:ident) => { impl_def!($vid, crate::f32::consts::PI); }; } macro_rules! impl_def64 { ($vid:ident) => { impl_def!($vid, crate::f64::consts::PI); }; } cfg_if! { if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05avx2128); impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx2); impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx2); impl_unary!(f32x4: Sleef_cospif4_u05avx2128); impl_unary!(f32x8: Sleef_cospif8_u05avx2); impl_unary!(f64x2: Sleef_cospid2_u05avx2128); impl_unary!(f64x4: Sleef_cospid4_u05avx2); } else if #[cfg(target_feature = "avx")] { impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); impl_unary!(f32x16[h => f32x8]: Sleef_cospif8_u05avx); impl_unary!(f64x8[h => f64x4]: Sleef_cospid4_u05avx); impl_unary!(f32x4: Sleef_cospif4_u05sse4); impl_unary!(f32x8: Sleef_cospif8_u05avx); impl_unary!(f64x2: Sleef_cospid2_u05sse4); impl_unary!(f64x4: Sleef_cospid4_u05avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary!(f32x2[t => f32x4]: Sleef_cospif4_u05sse4); impl_unary!(f32x16[q => f32x4]: Sleef_cospif4_u05sse4); impl_unary!(f64x8[q => f64x2]: Sleef_cospid2_u05sse4); impl_unary!(f32x4: Sleef_cospif4_u05sse4); impl_unary!(f32x8[h => f32x4]: Sleef_cospif4_u05sse4); impl_unary!(f64x2: Sleef_cospid2_u05sse4); impl_unary!(f64x4[h => f64x2]: Sleef_cospid2_u05sse4); } else { impl_def32!(f32x2); impl_def32!(f32x4); impl_def32!(f32x8); impl_def32!(f32x16); impl_def64!(f64x2); impl_def64!(f64x4); impl_def64!(f64x8); } } } else { impl_def32!(f32x2); impl_def32!(f32x4); impl_def32!(f32x8); impl_def32!(f32x16); impl_def64!(f64x2); impl_def64!(f64x4); impl_def64!(f64x8); } } packed_simd-0.3.3/src/codegen/math/float/exp.rs010064400007650000024000000102571342460246600176060ustar0000000000000000//! Vertical floating-point `exp` #![allow(unused)] // FIXME 64-bit expgle elem vectors misexpg use crate::*; crate trait Exp { fn exp(self) -> Self; } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.exp.v2f32"] fn exp_v2f32(x: f32x2) -> f32x2; #[link_name = "llvm.exp.v4f32"] fn exp_v4f32(x: f32x4) -> f32x4; #[link_name = "llvm.exp.v8f32"] fn exp_v8f32(x: f32x8) -> f32x8; #[link_name = "llvm.exp.v16f32"] fn exp_v16f32(x: f32x16) -> f32x16; /* FIXME 64-bit expgle elem vectors #[link_name = "llvm.exp.v1f64"] fn exp_v1f64(x: f64x1) -> f64x1; */ #[link_name = "llvm.exp.v2f64"] fn exp_v2f64(x: f64x2) -> f64x2; #[link_name = "llvm.exp.v4f64"] fn exp_v4f64(x: f64x4) -> f64x4; #[link_name = "llvm.exp.v8f64"] fn exp_v8f64(x: f64x8) -> f64x8; #[link_name = "llvm.exp.f32"] fn exp_f32(x: f32) -> f32; #[link_name = "llvm.exp.f64"] fn exp_f64(x: f64) -> f64; } gen_unary_impl_table!(Exp, exp); cfg_if! { if #[cfg(target_arch = "s390x")] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 impl_unary!(f32x2[f32; 2]: exp_f32); impl_unary!(f32x4[f32; 4]: exp_f32); impl_unary!(f32x8[f32; 8]: exp_f32); impl_unary!(f32x16[f32; 16]: exp_f32); impl_unary!(f64x2[f64; 2]: exp_f64); impl_unary!(f64x4[f64; 4]: exp_f64); impl_unary!(f64x8[f64; 8]: exp_f64); } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10avx2128); impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx2); impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx2); impl_unary!(f32x4: Sleef_expf4_u10avx2128); impl_unary!(f32x8: Sleef_expf8_u10avx2); impl_unary!(f64x2: Sleef_expd2_u10avx2128); impl_unary!(f64x4: Sleef_expd4_u10avx2); } else if #[cfg(target_feature = "avx")] { impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); impl_unary!(f32x16[h => f32x8]: Sleef_expf8_u10avx); impl_unary!(f64x8[h => f64x4]: Sleef_expd4_u10avx); impl_unary!(f32x4: Sleef_expf4_u10sse4); impl_unary!(f32x8: Sleef_expf8_u10avx); impl_unary!(f64x2: Sleef_expd2_u10sse4); impl_unary!(f64x4: Sleef_expd4_u10avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse4); impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse4); impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse4); impl_unary!(f32x4: Sleef_expf4_u10sse4); impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse4); impl_unary!(f64x2: Sleef_expd2_u10sse4); impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse4); } else if #[cfg(target_feature = "sse2")] { impl_unary!(f32x2[t => f32x4]: Sleef_expf4_u10sse2); impl_unary!(f32x16[q => f32x4]: Sleef_expf4_u10sse2); impl_unary!(f64x8[q => f64x2]: Sleef_expd2_u10sse2); impl_unary!(f32x4: Sleef_expf4_u10sse2); impl_unary!(f32x8[h => f32x4]: Sleef_expf4_u10sse2); impl_unary!(f64x2: Sleef_expd2_u10sse2); impl_unary!(f64x4[h => f64x2]: Sleef_expd2_u10sse2); } else { impl_unary!(f32x2[f32; 2]: exp_f32); impl_unary!(f32x16: exp_v16f32); impl_unary!(f64x8: exp_v8f64); impl_unary!(f32x4: exp_v4f32); impl_unary!(f32x8: exp_v8f32); impl_unary!(f64x2: exp_v2f64); impl_unary!(f64x4: exp_v4f64); } } } else { impl_unary!(f32x2[f32; 2]: exp_f32); impl_unary!(f32x4: exp_v4f32); impl_unary!(f32x8: exp_v8f32); impl_unary!(f32x16: exp_v16f32); impl_unary!(f64x2: exp_v2f64); impl_unary!(f64x4: exp_v4f64); impl_unary!(f64x8: exp_v8f64); } } packed_simd-0.3.3/src/codegen/math/float/ln.rs010064400007650000024000000102101342460246600174100ustar0000000000000000//! Vertical floating-point `ln` #![allow(unused)] // FIXME 64-bit lngle elem vectors mislng use crate::*; crate trait Ln { fn ln(self) -> Self; } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.log.v2f32"] fn ln_v2f32(x: f32x2) -> f32x2; #[link_name = "llvm.log.v4f32"] fn ln_v4f32(x: f32x4) -> f32x4; #[link_name = "llvm.log.v8f32"] fn ln_v8f32(x: f32x8) -> f32x8; #[link_name = "llvm.log.v16f32"] fn ln_v16f32(x: f32x16) -> f32x16; /* FIXME 64-bit lngle elem vectors #[link_name = "llvm.log.v1f64"] fn ln_v1f64(x: f64x1) -> f64x1; */ #[link_name = "llvm.log.v2f64"] fn ln_v2f64(x: f64x2) -> f64x2; #[link_name = "llvm.log.v4f64"] fn ln_v4f64(x: f64x4) -> f64x4; #[link_name = "llvm.log.v8f64"] fn ln_v8f64(x: f64x8) -> f64x8; #[link_name = "llvm.log.f32"] fn ln_f32(x: f32) -> f32; #[link_name = "llvm.log.f64"] fn ln_f64(x: f64) -> f64; } gen_unary_impl_table!(Ln, ln); cfg_if! { if #[cfg(target_arch = "s390x")] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 impl_unary!(f32x2[f32; 2]: ln_f32); impl_unary!(f32x4[f32; 4]: ln_f32); impl_unary!(f32x8[f32; 8]: ln_f32); impl_unary!(f32x16[f32; 16]: ln_f32); impl_unary!(f64x2[f64; 2]: ln_f64); impl_unary!(f64x4[f64; 4]: ln_f64); impl_unary!(f64x8[f64; 8]: ln_f64); } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10avx2128); impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx2); impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx2); impl_unary!(f32x4: Sleef_logf4_u10avx2128); impl_unary!(f32x8: Sleef_logf8_u10avx2); impl_unary!(f64x2: Sleef_logd2_u10avx2128); impl_unary!(f64x4: Sleef_logd4_u10avx2); } else if #[cfg(target_feature = "avx")] { impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); impl_unary!(f32x16[h => f32x8]: Sleef_logf8_u10avx); impl_unary!(f64x8[h => f64x4]: Sleef_logd4_u10avx); impl_unary!(f32x4: Sleef_logf4_u10sse4); impl_unary!(f32x8: Sleef_logf8_u10avx); impl_unary!(f64x2: Sleef_logd2_u10sse4); impl_unary!(f64x4: Sleef_logd4_u10avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse4); impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse4); impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse4); impl_unary!(f32x4: Sleef_logf4_u10sse4); impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse4); impl_unary!(f64x2: Sleef_logd2_u10sse4); impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse4); } else if #[cfg(target_feature = "sse2")] { impl_unary!(f32x2[t => f32x4]: Sleef_logf4_u10sse2); impl_unary!(f32x16[q => f32x4]: Sleef_logf4_u10sse2); impl_unary!(f64x8[q => f64x2]: Sleef_logd2_u10sse2); impl_unary!(f32x4: Sleef_logf4_u10sse2); impl_unary!(f32x8[h => f32x4]: Sleef_logf4_u10sse2); impl_unary!(f64x2: Sleef_logd2_u10sse2); impl_unary!(f64x4[h => f64x2]: Sleef_logd2_u10sse2); } else { impl_unary!(f32x2[f32; 2]: ln_f32); impl_unary!(f32x16: ln_v16f32); impl_unary!(f64x8: ln_v8f64); impl_unary!(f32x4: ln_v4f32); impl_unary!(f32x8: ln_v8f32); impl_unary!(f64x2: ln_v2f64); impl_unary!(f64x4: ln_v4f64); } } } else { impl_unary!(f32x2[f32; 2]: ln_f32); impl_unary!(f32x4: ln_v4f32); impl_unary!(f32x8: ln_v8f32); impl_unary!(f32x16: ln_v16f32); impl_unary!(f64x2: ln_v2f64); impl_unary!(f64x4: ln_v4f64); impl_unary!(f64x8: ln_v8f64); } } packed_simd-0.3.3/src/codegen/math/float/macros.rs010064400007650000024000000465671342460246600203130ustar0000000000000000//! Utility macros #![allow(unused)] macro_rules! impl_unary_ { // implementation mapping 1:1 (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self) -> Self { unsafe { use crate::mem::transmute; transmute($fun(transmute(self))) } } } }; // implementation mapping 1:1 for when `$fun` is a generic function // like some of the fp math rustc intrinsics (e.g. `fn fun(x: T) -> T`). (gen | $trait_id:ident, $trait_method:ident, $vec_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self) -> Self { unsafe { use crate::mem::transmute; transmute($fun(self.0)) } } } }; (scalar | $trait_id:ident, $trait_method:ident, $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self) -> Self { unsafe { union U { vec: $vec_id, scalars: [$sid; $scount], } let mut scalars = U { vec: self }.scalars; for i in &mut scalars { *i = $fun(*i); } U { scalars }.vec } } } }; // implementation calling fun twice on each of the vector halves: (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, $vech_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self) -> Self { unsafe { use crate::mem::transmute; union U { vec: $vec_id, halves: [$vech_id; 2], } let mut halves = U { vec: self }.halves; *halves.get_unchecked_mut(0) = transmute($fun(transmute(*halves.get_unchecked(0)))); *halves.get_unchecked_mut(1) = transmute($fun(transmute(*halves.get_unchecked(1)))); U { halves }.vec } } } }; // implementation calling fun four times on each of the vector quarters: (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, $vecq_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self) -> Self { unsafe { use crate::mem::transmute; union U { vec: $vec_id, quarters: [$vecq_id; 4], } let mut quarters = U { vec: self }.quarters; *quarters.get_unchecked_mut(0) = transmute($fun(transmute(*quarters.get_unchecked(0)))); *quarters.get_unchecked_mut(1) = transmute($fun(transmute(*quarters.get_unchecked(1)))); *quarters.get_unchecked_mut(2) = transmute($fun(transmute(*quarters.get_unchecked(2)))); *quarters.get_unchecked_mut(3) = transmute($fun(transmute(*quarters.get_unchecked(3)))); U { quarters }.vec } } } }; // implementation calling fun once on a vector twice as large: (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, $vect_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self) -> Self { unsafe { use crate::mem::{transmute, uninitialized}; union U { vec: [$vec_id; 2], twice: $vect_id, } let twice = U { vec: [self, uninitialized()] }.twice; let twice = transmute($fun(transmute(twice))); *(U { twice }.vec.get_unchecked(0)) } } } }; } macro_rules! gen_unary_impl_table { ($trait_id:ident, $trait_method:ident) => { macro_rules! impl_unary { ($vid:ident: $fun:ident) => { impl_unary_!(vec | $trait_id, $trait_method, $vid, $fun); }; ($vid:ident[g]: $fun:ident) => { impl_unary_!(gen | $trait_id, $trait_method, $vid, $fun); }; ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { impl_unary_!( scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun ); }; ($vid:ident[s]: $fun:ident) => { impl_unary_!(scalar | $trait_id, $trait_method, $vid, $fun); }; ($vid:ident[h => $vid_h:ident]: $fun:ident) => { impl_unary_!( halves | $trait_id, $trait_method, $vid, $vid_h, $fun ); }; ($vid:ident[q => $vid_q:ident]: $fun:ident) => { impl_unary_!( quarter | $trait_id, $trait_method, $vid, $vid_q, $fun ); }; ($vid:ident[t => $vid_t:ident]: $fun:ident) => { impl_unary_!( twice | $trait_id, $trait_method, $vid, $vid_t, $fun ); }; } }; } macro_rules! impl_tertiary_ { // implementation mapping 1:1 (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self, z: Self) -> Self { unsafe { use crate::mem::transmute; transmute($fun( transmute(self), transmute(y), transmute(z), )) } } } }; (scalar | $trait_id:ident, $trait_method:ident, $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self, z: Self) -> Self { unsafe { union U { vec: $vec_id, scalars: [$sid; $scount], } let mut x = U { vec: self }.scalars; let y = U { vec: y }.scalars; let z = U { vec: z }.scalars; for (x, (y, z)) in (&mut scalars).zip(&y).zip(&z) { *i = $fun(*i, *y, *z); } U { vec: x }.vec } } } }; // implementation calling fun twice on each of the vector halves: (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, $vech_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self, z: Self) -> Self { unsafe { use crate::mem::transmute; union U { vec: $vec_id, halves: [$vech_id; 2], } let mut x_halves = U { vec: self }.halves; let y_halves = U { vec: y }.halves; let z_halves = U { vec: z }.halves; *x_halves.get_unchecked_mut(0) = transmute($fun( transmute(*x_halves.get_unchecked(0)), transmute(*y_halves.get_unchecked(0)), transmute(*z_halves.get_unchecked(0)), )); *x_halves.get_unchecked_mut(1) = transmute($fun( transmute(*x_halves.get_unchecked(1)), transmute(*y_halves.get_unchecked(1)), transmute(*z_halves.get_unchecked(1)), )); U { halves: x_halves }.vec } } } }; // implementation calling fun four times on each of the vector quarters: (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, $vecq_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self, z: Self) -> Self { unsafe { use crate::mem::transmute; union U { vec: $vec_id, quarters: [$vecq_id; 4], } let mut x_quarters = U { vec: self }.quarters; let y_quarters = U { vec: y }.quarters; let z_quarters = U { vec: z }.quarters; *x_quarters.get_unchecked_mut(0) = transmute($fun( transmute(*x_quarters.get_unchecked(0)), transmute(*y_quarters.get_unchecked(0)), transmute(*z_quarters.get_unchecked(0)), )); *x_quarters.get_unchecked_mut(1) = transmute($fun( transmute(*x_quarters.get_unchecked(1)), transmute(*y_quarters.get_unchecked(1)), transmute(*z_quarters.get_unchecked(1)), )); *x_quarters.get_unchecked_mut(2) = transmute($fun( transmute(*x_quarters.get_unchecked(2)), transmute(*y_quarters.get_unchecked(2)), transmute(*z_quarters.get_unchecked(2)), )); *x_quarters.get_unchecked_mut(3) = transmute($fun( transmute(*x_quarters.get_unchecked(3)), transmute(*y_quarters.get_unchecked(3)), transmute(*z_quarters.get_unchecked(3)), )); U { quarters: x_quarters }.vec } } } }; // implementation calling fun once on a vector twice as large: (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, $vect_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self, z: Self) -> Self { unsafe { use crate::mem::{transmute, uninitialized}; union U { vec: [$vec_id; 2], twice: $vect_id, } let x_twice = U { vec: [self, uninitialized()] }.twice; let y_twice = U { vec: [y, uninitialized()] }.twice; let z_twice = U { vec: [z, uninitialized()] }.twice; let twice: $vect_id = transmute($fun( transmute(x_twice), transmute(y_twice), transmute(z_twice), )); *(U { twice }.vec.get_unchecked(0)) } } } }; } macro_rules! gen_tertiary_impl_table { ($trait_id:ident, $trait_method:ident) => { macro_rules! impl_tertiary { ($vid:ident: $fun:ident) => { impl_tertiary_!(vec | $trait_id, $trait_method, $vid, $fun); }; ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { impl_tertiary_!( scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun ); }; ($vid:ident[s]: $fun:ident) => { impl_tertiary_!(scalar | $trait_id, $trait_method, $vid, $fun); }; ($vid:ident[h => $vid_h:ident]: $fun:ident) => { impl_tertiary_!( halves | $trait_id, $trait_method, $vid, $vid_h, $fun ); }; ($vid:ident[q => $vid_q:ident]: $fun:ident) => { impl_tertiary_!( quarter | $trait_id, $trait_method, $vid, $vid_q, $fun ); }; ($vid:ident[t => $vid_t:ident]: $fun:ident) => { impl_tertiary_!( twice | $trait_id, $trait_method, $vid, $vid_t, $fun ); }; } }; } macro_rules! impl_binary_ { // implementation mapping 1:1 (vec | $trait_id:ident, $trait_method:ident, $vec_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self) -> Self { unsafe { use crate::mem::transmute; transmute($fun(transmute(self), transmute(y))) } } } }; (scalar | $trait_id:ident, $trait_method:ident, $vec_id:ident, [$sid:ident; $scount:expr], $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self) -> Self { unsafe { union U { vec: $vec_id, scalars: [$sid; $scount], } let mut x = U { vec: self }.scalars; let y = U { vec: y }.scalars; for (x, y) in x.iter_mut().zip(&y) { *x = $fun(*x, *y); } U { scalars: x }.vec } } } }; // implementation calling fun twice on each of the vector halves: (halves | $trait_id:ident, $trait_method:ident, $vec_id:ident, $vech_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self) -> Self { unsafe { use crate::mem::transmute; union U { vec: $vec_id, halves: [$vech_id; 2], } let mut x_halves = U { vec: self }.halves; let y_halves = U { vec: y }.halves; *x_halves.get_unchecked_mut(0) = transmute($fun( transmute(*x_halves.get_unchecked(0)), transmute(*y_halves.get_unchecked(0)), )); *x_halves.get_unchecked_mut(1) = transmute($fun( transmute(*x_halves.get_unchecked(1)), transmute(*y_halves.get_unchecked(1)), )); U { halves: x_halves }.vec } } } }; // implementation calling fun four times on each of the vector quarters: (quarter | $trait_id:ident, $trait_method:ident, $vec_id:ident, $vecq_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self) -> Self { unsafe { use crate::mem::transmute; union U { vec: $vec_id, quarters: [$vecq_id; 4], } let mut x_quarters = U { vec: self }.quarters; let y_quarters = U { vec: y }.quarters; *x_quarters.get_unchecked_mut(0) = transmute($fun( transmute(*x_quarters.get_unchecked(0)), transmute(*y_quarters.get_unchecked(0)), )); *x_quarters.get_unchecked_mut(1) = transmute($fun( transmute(*x_quarters.get_unchecked(1)), transmute(*y_quarters.get_unchecked(1)), )); *x_quarters.get_unchecked_mut(2) = transmute($fun( transmute(*x_quarters.get_unchecked(2)), transmute(*y_quarters.get_unchecked(2)), )); *x_quarters.get_unchecked_mut(3) = transmute($fun( transmute(*x_quarters.get_unchecked(3)), transmute(*y_quarters.get_unchecked(3)), )); U { quarters: x_quarters }.vec } } } }; // implementation calling fun once on a vector twice as large: (twice | $trait_id:ident, $trait_method:ident, $vec_id:ident, $vect_id:ident, $fun:ident) => { impl $trait_id for $vec_id { #[inline] fn $trait_method(self, y: Self) -> Self { unsafe { use crate::mem::{transmute, uninitialized}; union U { vec: [$vec_id; 2], twice: $vect_id, } let x_twice = U { vec: [self, uninitialized()] }.twice; let y_twice = U { vec: [y, uninitialized()] }.twice; let twice: $vect_id = transmute($fun( transmute(x_twice), transmute(y_twice), )); *(U { twice }.vec.get_unchecked(0)) } } } }; } macro_rules! gen_binary_impl_table { ($trait_id:ident, $trait_method:ident) => { macro_rules! impl_binary { ($vid:ident: $fun:ident) => { impl_binary_!(vec | $trait_id, $trait_method, $vid, $fun); }; ($vid:ident[$sid:ident; $sc:expr]: $fun:ident) => { impl_binary_!( scalar | $trait_id, $trait_method, $vid, [$sid; $sc], $fun ); }; ($vid:ident[s]: $fun:ident) => { impl_binary_!(scalar | $trait_id, $trait_method, $vid, $fun); }; ($vid:ident[h => $vid_h:ident]: $fun:ident) => { impl_binary_!( halves | $trait_id, $trait_method, $vid, $vid_h, $fun ); }; ($vid:ident[q => $vid_q:ident]: $fun:ident) => { impl_binary_!( quarter | $trait_id, $trait_method, $vid, $vid_q, $fun ); }; ($vid:ident[t => $vid_t:ident]: $fun:ident) => { impl_binary_!( twice | $trait_id, $trait_method, $vid, $vid_t, $fun ); }; } }; } packed_simd-0.3.3/src/codegen/math/float/mul_add.rs010064400007650000024000000100611342460246600204100ustar0000000000000000//! Vertical floating-point `mul_add` #![allow(unused)] use crate::*; // FIXME: 64-bit 1 element mul_add crate trait MulAdd { fn mul_add(self, y: Self, z: Self) -> Self; } #[cfg(not(target_arch = "s390x"))] #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.fma.v2f32"] fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; #[link_name = "llvm.fma.v4f32"] fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; #[link_name = "llvm.fma.v8f32"] fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; #[link_name = "llvm.fma.v16f32"] fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; /* FIXME 64-bit single elem vectors #[link_name = "llvm.fma.v1f64"] fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; */ #[link_name = "llvm.fma.v2f64"] fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; #[link_name = "llvm.fma.v4f64"] fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; #[link_name = "llvm.fma.v8f64"] fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; } gen_tertiary_impl_table!(MulAdd, mul_add); cfg_if! { if #[cfg(target_arch = "s390x")] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 macro_rules! impl_broken { ($id:ident) => { impl MulAdd for $id { #[inline] fn mul_add(self, y: Self, z: Self) -> Self { self * y + z } } }; } impl_broken!(f32x2); impl_broken!(f32x4); impl_broken!(f32x8); impl_broken!(f32x16); impl_broken!(f64x2); impl_broken!(f64x4); impl_broken!(f64x8); } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_avx2128); impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx2); impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx2); impl_tertiary!(f32x4: Sleef_fmaf4_avx2128); impl_tertiary!(f32x8: Sleef_fmaf8_avx2); impl_tertiary!(f64x2: Sleef_fmad2_avx2128); impl_tertiary!(f64x4: Sleef_fmad4_avx2); } else if #[cfg(target_feature = "avx")] { impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); impl_tertiary!(f32x16[h => f32x8]: Sleef_fmaf8_avx); impl_tertiary!(f64x8[h => f64x4]: Sleef_fmad4_avx); impl_tertiary!(f32x4: Sleef_fmaf4_sse4); impl_tertiary!(f32x8: Sleef_fmaf8_avx); impl_tertiary!(f64x2: Sleef_fmad2_sse4); impl_tertiary!(f64x4: Sleef_fmad4_avx); } else if #[cfg(target_feature = "sse4.2")] { impl_tertiary!(f32x2[t => f32x4]: Sleef_fmaf4_sse4); impl_tertiary!(f32x16[q => f32x4]: Sleef_fmaf4_sse4); impl_tertiary!(f64x8[q => f64x2]: Sleef_fmad2_sse4); impl_tertiary!(f32x4: Sleef_fmaf4_sse4); impl_tertiary!(f32x8[h => f32x4]: Sleef_fmaf4_sse4); impl_tertiary!(f64x2: Sleef_fmad2_sse4); impl_tertiary!(f64x4[h => f64x2]: Sleef_fmad2_sse4); } else { impl_tertiary!(f32x2: fma_v2f32); impl_tertiary!(f32x16: fma_v16f32); impl_tertiary!(f64x8: fma_v8f64); impl_tertiary!(f32x4: fma_v4f32); impl_tertiary!(f32x8: fma_v8f32); impl_tertiary!(f64x2: fma_v2f64); impl_tertiary!(f64x4: fma_v4f64); } } } else { impl_tertiary!(f32x2: fma_v2f32); impl_tertiary!(f32x4: fma_v4f32); impl_tertiary!(f32x8: fma_v8f32); impl_tertiary!(f32x16: fma_v16f32); // impl_tertiary!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors impl_tertiary!(f64x2: fma_v2f64); impl_tertiary!(f64x4: fma_v4f64); impl_tertiary!(f64x8: fma_v8f64); } } packed_simd-0.3.3/src/codegen/math/float/mul_adde.rs010064400007650000024000000043031342460246600205570ustar0000000000000000//! Approximation for floating-point `mul_add` use crate::*; // FIXME: 64-bit 1 element mul_adde crate trait MulAddE { fn mul_adde(self, y: Self, z: Self) -> Self; } #[cfg(not(target_arch = "s390x"))] #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.fmuladd.v2f32"] fn fmuladd_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; #[link_name = "llvm.fmuladd.v4f32"] fn fmuladd_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; #[link_name = "llvm.fmuladd.v8f32"] fn fmuladd_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; #[link_name = "llvm.fmuladd.v16f32"] fn fmuladd_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; /* FIXME 64-bit single elem vectors #[link_name = "llvm.fmuladd.v1f64"] fn fmuladd_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1; */ #[link_name = "llvm.fmuladd.v2f64"] fn fmuladd_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; #[link_name = "llvm.fmuladd.v4f64"] fn fmuladd_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; #[link_name = "llvm.fmuladd.v8f64"] fn fmuladd_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; } macro_rules! impl_mul_adde { ($id:ident : $fn:ident) => { impl MulAddE for $id { #[inline] fn mul_adde(self, y: Self, z: Self) -> Self { #[cfg(not(target_arch = "s390x"))] { use crate::mem::transmute; unsafe { transmute($fn( transmute(self), transmute(y), transmute(z), )) } } #[cfg(target_arch = "s390x")] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 self * y + z } } } }; } impl_mul_adde!(f32x2: fmuladd_v2f32); impl_mul_adde!(f32x4: fmuladd_v4f32); impl_mul_adde!(f32x8: fmuladd_v8f32); impl_mul_adde!(f32x16: fmuladd_v16f32); // impl_mul_adde!(f64x1: fma_v1f64); // FIXME 64-bit fmagle elem vectors impl_mul_adde!(f64x2: fmuladd_v2f64); impl_mul_adde!(f64x4: fmuladd_v4f64); impl_mul_adde!(f64x8: fmuladd_v8f64); packed_simd-0.3.3/src/codegen/math/float/powf.rs010064400007650000024000000105621342460246600177640ustar0000000000000000//! Vertical floating-point `powf` #![allow(unused)] // FIXME 64-bit powfgle elem vectors mispowfg use crate::*; crate trait Powf { fn powf(self, x: Self) -> Self; } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.pow.v2f32"] fn powf_v2f32(x: f32x2, y: f32x2) -> f32x2; #[link_name = "llvm.pow.v4f32"] fn powf_v4f32(x: f32x4, y: f32x4) -> f32x4; #[link_name = "llvm.pow.v8f32"] fn powf_v8f32(x: f32x8, y: f32x8) -> f32x8; #[link_name = "llvm.pow.v16f32"] fn powf_v16f32(x: f32x16, y: f32x16) -> f32x16; /* FIXME 64-bit powfgle elem vectors #[link_name = "llvm.pow.v1f64"] fn powf_v1f64(x: f64x1, y: f64x1) -> f64x1; */ #[link_name = "llvm.pow.v2f64"] fn powf_v2f64(x: f64x2, y: f64x2) -> f64x2; #[link_name = "llvm.pow.v4f64"] fn powf_v4f64(x: f64x4, y: f64x4) -> f64x4; #[link_name = "llvm.pow.v8f64"] fn powf_v8f64(x: f64x8, y: f64x8) -> f64x8; #[link_name = "llvm.pow.f32"] fn powf_f32(x: f32, y: f32) -> f32; #[link_name = "llvm.pow.f64"] fn powf_f64(x: f64, y: f64) -> f64; } gen_binary_impl_table!(Powf, powf); cfg_if! { if #[cfg(target_arch = "s390x")] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 impl_binary!(f32x2[f32; 2]: powf_f32); impl_binary!(f32x4[f32; 4]: powf_f32); impl_binary!(f32x8[f32; 8]: powf_f32); impl_binary!(f32x16[f32; 16]: powf_f32); impl_binary!(f64x2[f64; 2]: powf_f64); impl_binary!(f64x4[f64; 4]: powf_f64); impl_binary!(f64x8[f64; 8]: powf_f64); } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10avx2128); impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx2); impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx2); impl_binary!(f32x4: Sleef_powf4_u10avx2128); impl_binary!(f32x8: Sleef_powf8_u10avx2); impl_binary!(f64x2: Sleef_powd2_u10avx2128); impl_binary!(f64x4: Sleef_powd4_u10avx2); } else if #[cfg(target_feature = "avx")] { impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); impl_binary!(f32x16[h => f32x8]: Sleef_powf8_u10avx); impl_binary!(f64x8[h => f64x4]: Sleef_powd4_u10avx); impl_binary!(f32x4: Sleef_powf4_u10sse4); impl_binary!(f32x8: Sleef_powf8_u10avx); impl_binary!(f64x2: Sleef_powd2_u10sse4); impl_binary!(f64x4: Sleef_powd4_u10avx); } else if #[cfg(target_feature = "sse4.2")] { impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse4); impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse4); impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse4); impl_binary!(f32x4: Sleef_powf4_u10sse4); impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse4); impl_binary!(f64x2: Sleef_powd2_u10sse4); impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse4); } else if #[cfg(target_feature = "sse2")] { impl_binary!(f32x2[t => f32x4]: Sleef_powf4_u10sse2); impl_binary!(f32x16[q => f32x4]: Sleef_powf4_u10sse2); impl_binary!(f64x8[q => f64x2]: Sleef_powd2_u10sse2); impl_binary!(f32x4: Sleef_powf4_u10sse2); impl_binary!(f32x8[h => f32x4]: Sleef_powf4_u10sse2); impl_binary!(f64x2: Sleef_powd2_u10sse2); impl_binary!(f64x4[h => f64x2]: Sleef_powd2_u10sse2); } else { impl_binary!(f32x2[f32; 2]: powf_f32); impl_binary!(f32x4: powf_v4f32); impl_binary!(f32x8: powf_v8f32); impl_binary!(f32x16: powf_v16f32); impl_binary!(f64x2: powf_v2f64); impl_binary!(f64x4: powf_v4f64); impl_binary!(f64x8: powf_v8f64); } } } else { impl_binary!(f32x2[f32; 2]: powf_f32); impl_binary!(f32x4: powf_v4f32); impl_binary!(f32x8: powf_v8f32); impl_binary!(f32x16: powf_v16f32); impl_binary!(f64x2: powf_v2f64); impl_binary!(f64x4: powf_v4f64); impl_binary!(f64x8: powf_v8f64); } } packed_simd-0.3.3/src/codegen/math/float/sin.rs010064400007650000024000000072411342460246600176020ustar0000000000000000//! Vertical floating-point `sin` #![allow(unused)] // FIXME 64-bit 1 elem vectors sin use crate::*; crate trait Sin { fn sin(self) -> Self; } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.sin.v2f32"] fn sin_v2f32(x: f32x2) -> f32x2; #[link_name = "llvm.sin.v4f32"] fn sin_v4f32(x: f32x4) -> f32x4; #[link_name = "llvm.sin.v8f32"] fn sin_v8f32(x: f32x8) -> f32x8; #[link_name = "llvm.sin.v16f32"] fn sin_v16f32(x: f32x16) -> f32x16; /* FIXME 64-bit single elem vectors #[link_name = "llvm.sin.v1f64"] fn sin_v1f64(x: f64x1) -> f64x1; */ #[link_name = "llvm.sin.v2f64"] fn sin_v2f64(x: f64x2) -> f64x2; #[link_name = "llvm.sin.v4f64"] fn sin_v4f64(x: f64x4) -> f64x4; #[link_name = "llvm.sin.v8f64"] fn sin_v8f64(x: f64x8) -> f64x8; #[link_name = "llvm.sin.f32"] fn sin_f32(x: f32) -> f32; #[link_name = "llvm.sin.f64"] fn sin_f64(x: f64) -> f64; } gen_unary_impl_table!(Sin, sin); cfg_if! { if #[cfg(target_arch = "s390x")] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 impl_unary!(f32x2[f32; 2]: sin_f32); impl_unary!(f32x4[f32; 4]: sin_f32); impl_unary!(f32x8[f32; 8]: sin_f32); impl_unary!(f32x16[f32; 16]: sin_f32); impl_unary!(f64x2[f64; 2]: sin_f64); impl_unary!(f64x4[f64; 4]: sin_f64); impl_unary!(f64x8[f64; 8]: sin_f64); } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10avx2128); impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx2); impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx2); impl_unary!(f32x4: Sleef_sinf4_u10avx2128); impl_unary!(f32x8: Sleef_sinf8_u10avx2); impl_unary!(f64x2: Sleef_sind2_u10avx2128); impl_unary!(f64x4: Sleef_sind4_u10avx2); } else if #[cfg(target_feature = "avx")] { impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); impl_unary!(f32x16[h => f32x8]: Sleef_sinf8_u10avx); impl_unary!(f64x8[h => f64x4]: Sleef_sind4_u10avx); impl_unary!(f32x4: Sleef_sinf4_u10sse4); impl_unary!(f32x8: Sleef_sinf8_u10avx); impl_unary!(f64x2: Sleef_sind2_u10sse4); impl_unary!(f64x4: Sleef_sind4_u10avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary!(f32x2[t => f32x4]: Sleef_sinf4_u10sse4); impl_unary!(f32x16[q => f32x4]: Sleef_sinf4_u10sse4); impl_unary!(f64x8[q => f64x2]: Sleef_sind2_u10sse4); impl_unary!(f32x4: Sleef_sinf4_u10sse4); impl_unary!(f32x8[h => f32x4]: Sleef_sinf4_u10sse4); impl_unary!(f64x2: Sleef_sind2_u10sse4); impl_unary!(f64x4[h => f64x2]: Sleef_sind2_u10sse4); } else { impl_unary!(f32x2[f32; 2]: sin_f32); impl_unary!(f32x16: sin_v16f32); impl_unary!(f64x8: sin_v8f64); impl_unary!(f32x4: sin_v4f32); impl_unary!(f32x8: sin_v8f32); impl_unary!(f64x2: sin_v2f64); impl_unary!(f64x4: sin_v4f64); } } } else { impl_unary!(f32x2[f32; 2]: sin_f32); impl_unary!(f32x4: sin_v4f32); impl_unary!(f32x8: sin_v8f32); impl_unary!(f32x16: sin_v16f32); impl_unary!(f64x2: sin_v2f64); impl_unary!(f64x4: sin_v4f64); impl_unary!(f64x8: sin_v8f64); } } packed_simd-0.3.3/src/codegen/math/float/sin_cos_pi.rs010064400007650000024000000150531342460246600211360ustar0000000000000000//! Vertical floating-point `sin_cos` #![allow(unused)] // FIXME 64-bit 1 elem vectors sin_cos use crate::*; crate trait SinCosPi: Sized { type Output; fn sin_cos_pi(self) -> Self::Output; } macro_rules! impl_def { ($vid:ident, $PI:path) => { impl SinCosPi for $vid { type Output = (Self, Self); #[inline] fn sin_cos_pi(self) -> Self::Output { let v = self * Self::splat($PI); (v.sin(), v.cos()) } } }; } macro_rules! impl_def32 { ($vid:ident) => { impl_def!($vid, crate::f32::consts::PI); }; } macro_rules! impl_def64 { ($vid:ident) => { impl_def!($vid, crate::f64::consts::PI); }; } macro_rules! impl_unary_t { ($vid:ident: $fun:ident) => { impl SinCosPi for $vid { type Output = (Self, Self); fn sin_cos_pi(self) -> Self::Output { unsafe { use crate::mem::transmute; transmute($fun(transmute(self))) } } } }; ($vid:ident[t => $vid_t:ident]: $fun:ident) => { impl SinCosPi for $vid { type Output = (Self, Self); fn sin_cos_pi(self) -> Self::Output { unsafe { use crate::mem::{transmute, uninitialized}; union U { vec: [$vid; 2], twice: $vid_t, } let twice = U { vec: [self, uninitialized()] }.twice; let twice = transmute($fun(transmute(twice))); union R { twice: ($vid_t, $vid_t), vecs: ([$vid; 2], [$vid; 2]), } let r = R { twice }.vecs; (*r.0.get_unchecked(0), *r.0.get_unchecked(1)) } } } }; ($vid:ident[h => $vid_h:ident]: $fun:ident) => { impl SinCosPi for $vid { type Output = (Self, Self); fn sin_cos_pi(self) -> Self::Output { unsafe { use crate::mem::transmute; union U { vec: $vid, halves: [$vid_h; 2], } let halves = U { vec: self }.halves; let res_0: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(0)))); let res_1: ($vid_h, $vid_h) = transmute($fun(transmute(*halves.get_unchecked(1)))); union R { result: ($vid, $vid), halves: ([$vid_h; 2], [$vid_h; 2]), } R { halves: ([res_0.0, res_1.0], [res_0.1, res_1.1]) } .result } } } }; ($vid:ident[q => $vid_q:ident]: $fun:ident) => { impl SinCosPi for $vid { type Output = (Self, Self); fn sin_cos_pi(self) -> Self::Output { unsafe { use crate::mem::transmute; union U { vec: $vid, quarters: [$vid_q; 4], } let quarters = U { vec: self }.quarters; let res_0: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(0)))); let res_1: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(1)))); let res_2: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(2)))); let res_3: ($vid_q, $vid_q) = transmute($fun(transmute(*quarters.get_unchecked(3)))); union R { result: ($vid, $vid), quarters: ([$vid_q; 4], [$vid_q; 4]), } R { quarters: ( [res_0.0, res_1.0, res_2.0, res_3.0], [res_0.1, res_1.1, res_2.1, res_3.1], ), } .result } } } }; } cfg_if! { if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05avx2128); impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx2); impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx2); impl_unary_t!(f32x4: Sleef_sincospif4_u05avx2128); impl_unary_t!(f32x8: Sleef_sincospif8_u05avx2); impl_unary_t!(f64x2: Sleef_sincospid2_u05avx2128); impl_unary_t!(f64x4: Sleef_sincospid4_u05avx2); } else if #[cfg(target_feature = "avx")] { impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); impl_unary_t!(f32x16[h => f32x8]: Sleef_sincospif8_u05avx); impl_unary_t!(f64x8[h => f64x4]: Sleef_sincospid4_u05avx); impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); impl_unary_t!(f32x8: Sleef_sincospif8_u05avx); impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); impl_unary_t!(f64x4: Sleef_sincospid4_u05avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary_t!(f32x2[t => f32x4]: Sleef_sincospif4_u05sse4); impl_unary_t!(f32x16[q => f32x4]: Sleef_sincospif4_u05sse4); impl_unary_t!(f64x8[q => f64x2]: Sleef_sincospid2_u05sse4); impl_unary_t!(f32x4: Sleef_sincospif4_u05sse4); impl_unary_t!(f32x8[h => f32x4]: Sleef_sincospif4_u05sse4); impl_unary_t!(f64x2: Sleef_sincospid2_u05sse4); impl_unary_t!(f64x4[h => f64x2]: Sleef_sincospid2_u05sse4); } else { impl_def32!(f32x2); impl_def32!(f32x4); impl_def32!(f32x8); impl_def32!(f32x16); impl_def64!(f64x2); impl_def64!(f64x4); impl_def64!(f64x8); } } } else { impl_def32!(f32x2); impl_def32!(f32x4); impl_def32!(f32x8); impl_def32!(f32x16); impl_def64!(f64x2); impl_def64!(f64x4); impl_def64!(f64x8); } } packed_simd-0.3.3/src/codegen/math/float/sin_pi.rs010064400007650000024000000054111342460246600202670ustar0000000000000000//! Vertical floating-point `sin_pi` #![allow(unused)] // FIXME 64-bit 1 elem vectors sin_pi use crate::*; crate trait SinPi { fn sin_pi(self) -> Self; } gen_unary_impl_table!(SinPi, sin_pi); macro_rules! impl_def { ($vid:ident, $PI:path) => { impl SinPi for $vid { #[inline] fn sin_pi(self) -> Self { (self * Self::splat($PI)).sin() } } }; } macro_rules! impl_def32 { ($vid:ident) => { impl_def!($vid, crate::f32::consts::PI); }; } macro_rules! impl_def64 { ($vid:ident) => { impl_def!($vid, crate::f64::consts::PI); }; } cfg_if! { if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05avx2128); impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx2); impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx2); impl_unary!(f32x4: Sleef_sinpif4_u05avx2128); impl_unary!(f32x8: Sleef_sinpif8_u05avx2); impl_unary!(f64x2: Sleef_sinpid2_u05avx2128); impl_unary!(f64x4: Sleef_sinpid4_u05avx2); } else if #[cfg(target_feature = "avx")] { impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); impl_unary!(f32x16[h => f32x8]: Sleef_sinpif8_u05avx); impl_unary!(f64x8[h => f64x4]: Sleef_sinpid4_u05avx); impl_unary!(f32x4: Sleef_sinpif4_u05sse4); impl_unary!(f32x8: Sleef_sinpif8_u05avx); impl_unary!(f64x2: Sleef_sinpid2_u05sse4); impl_unary!(f64x4: Sleef_sinpid4_u05avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary!(f32x2[t => f32x4]: Sleef_sinpif4_u05sse4); impl_unary!(f32x16[q => f32x4]: Sleef_sinpif4_u05sse4); impl_unary!(f64x8[q => f64x2]: Sleef_sinpid2_u05sse4); impl_unary!(f32x4: Sleef_sinpif4_u05sse4); impl_unary!(f32x8[h => f32x4]: Sleef_sinpif4_u05sse4); impl_unary!(f64x2: Sleef_sinpid2_u05sse4); impl_unary!(f64x4[h => f64x2]: Sleef_sinpid2_u05sse4); } else { impl_def32!(f32x2); impl_def32!(f32x4); impl_def32!(f32x8); impl_def32!(f32x16); impl_def64!(f64x2); impl_def64!(f64x4); impl_def64!(f64x8); } } } else { impl_def32!(f32x2); impl_def32!(f32x4); impl_def32!(f32x8); impl_def32!(f32x16); impl_def64!(f64x2); impl_def64!(f64x4); impl_def64!(f64x8); } } packed_simd-0.3.3/src/codegen/math/float/sqrt.rs010064400007650000024000000072471342460246600200100ustar0000000000000000//! Vertical floating-point `sqrt` #![allow(unused)] // FIXME 64-bit 1 elem vectors sqrt use crate::*; crate trait Sqrt { fn sqrt(self) -> Self; } #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.sqrt.v2f32"] fn sqrt_v2f32(x: f32x2) -> f32x2; #[link_name = "llvm.sqrt.v4f32"] fn sqrt_v4f32(x: f32x4) -> f32x4; #[link_name = "llvm.sqrt.v8f32"] fn sqrt_v8f32(x: f32x8) -> f32x8; #[link_name = "llvm.sqrt.v16f32"] fn sqrt_v16f32(x: f32x16) -> f32x16; /* FIXME 64-bit sqrtgle elem vectors #[link_name = "llvm.sqrt.v1f64"] fn sqrt_v1f64(x: f64x1) -> f64x1; */ #[link_name = "llvm.sqrt.v2f64"] fn sqrt_v2f64(x: f64x2) -> f64x2; #[link_name = "llvm.sqrt.v4f64"] fn sqrt_v4f64(x: f64x4) -> f64x4; #[link_name = "llvm.sqrt.v8f64"] fn sqrt_v8f64(x: f64x8) -> f64x8; #[link_name = "llvm.sqrt.f32"] fn sqrt_f32(x: f32) -> f32; #[link_name = "llvm.sqrt.f64"] fn sqrt_f64(x: f64) -> f64; } gen_unary_impl_table!(Sqrt, sqrt); cfg_if! { if #[cfg(target_arch = "s390x")] { // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14 impl_unary!(f32x2[f32; 2]: sqrt_f32); impl_unary!(f32x4[f32; 4]: sqrt_f32); impl_unary!(f32x8[f32; 8]: sqrt_f32); impl_unary!(f32x16[f32; 16]: sqrt_f32); impl_unary!(f64x2[f64; 2]: sqrt_f64); impl_unary!(f64x4[f64; 4]: sqrt_f64); impl_unary!(f64x8[f64; 8]: sqrt_f64); } else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_avx2128); impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx2); impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx2); impl_unary!(f32x4: Sleef_sqrtf4_avx2128); impl_unary!(f32x8: Sleef_sqrtf8_avx2); impl_unary!(f64x2: Sleef_sqrtd2_avx2128); impl_unary!(f64x4: Sleef_sqrtd4_avx2); } else if #[cfg(target_feature = "avx")] { impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_avx); impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_avx); impl_unary!(f32x4: Sleef_sqrtf4_sse4); impl_unary!(f32x8: Sleef_sqrtf8_avx); impl_unary!(f64x2: Sleef_sqrtd2_sse4); impl_unary!(f64x4: Sleef_sqrtd4_avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_sse4); impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_sse4); impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_sse4); impl_unary!(f32x4: Sleef_sqrtf4_sse4); impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_sse4); impl_unary!(f64x2: Sleef_sqrtd2_sse4); impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_sse4); } else { impl_unary!(f32x2[f32; 2]: sqrt_f32); impl_unary!(f32x16: sqrt_v16f32); impl_unary!(f64x8: sqrt_v8f64); impl_unary!(f32x4: sqrt_v4f32); impl_unary!(f32x8: sqrt_v8f32); impl_unary!(f64x2: sqrt_v2f64); impl_unary!(f64x4: sqrt_v4f64); } } } else { impl_unary!(f32x2[f32; 2]: sqrt_f32); impl_unary!(f32x4: sqrt_v4f32); impl_unary!(f32x8: sqrt_v8f32); impl_unary!(f32x16: sqrt_v16f32); impl_unary!(f64x2: sqrt_v2f64); impl_unary!(f64x4: sqrt_v4f64); impl_unary!(f64x8: sqrt_v8f64); } } packed_simd-0.3.3/src/codegen/math/float/sqrte.rs010064400007650000024000000050461342460246600201500ustar0000000000000000//! Vertical floating-point `sqrt` #![allow(unused)] // FIXME 64-bit 1 elem vectors sqrte use crate::llvm::simd_fsqrt; use crate::*; crate trait Sqrte { fn sqrte(self) -> Self; } gen_unary_impl_table!(Sqrte, sqrte); cfg_if! { if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] { use sleef_sys::*; cfg_if! { if #[cfg(target_feature = "avx2")] { impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35avx2128); impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx2); impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx2); impl_unary!(f32x4: Sleef_sqrtf4_u35avx2128); impl_unary!(f32x8: Sleef_sqrtf8_u35avx2); impl_unary!(f64x2: Sleef_sqrtd2_u35avx2128); impl_unary!(f64x4: Sleef_sqrtd4_u35avx2); } else if #[cfg(target_feature = "avx")] { impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); impl_unary!(f32x16[h => f32x8]: Sleef_sqrtf8_u35avx); impl_unary!(f64x8[h => f64x4]: Sleef_sqrtd4_u35avx); impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); impl_unary!(f32x8: Sleef_sqrtf8_u35avx); impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); impl_unary!(f64x4: Sleef_sqrtd4_u35avx); } else if #[cfg(target_feature = "sse4.2")] { impl_unary!(f32x2[t => f32x4]: Sleef_sqrtf4_u35sse4); impl_unary!(f32x16[q => f32x4]: Sleef_sqrtf4_u35sse4); impl_unary!(f64x8[q => f64x2]: Sleef_sqrtd2_u35sse4); impl_unary!(f32x4: Sleef_sqrtf4_u35sse4); impl_unary!(f32x8[h => f32x4]: Sleef_sqrtf4_u35sse4); impl_unary!(f64x2: Sleef_sqrtd2_u35sse4); impl_unary!(f64x4[h => f64x2]: Sleef_sqrtd2_u35sse4); } else { impl_unary!(f32x2[g]: simd_fsqrt); impl_unary!(f32x16[g]: simd_fsqrt); impl_unary!(f64x8[g]: simd_fsqrt); impl_unary!(f32x4[g]: simd_fsqrt); impl_unary!(f32x8[g]: simd_fsqrt); impl_unary!(f64x2[g]: simd_fsqrt); impl_unary!(f64x4[g]: simd_fsqrt); } } } else { impl_unary!(f32x2[g]: simd_fsqrt); impl_unary!(f32x4[g]: simd_fsqrt); impl_unary!(f32x8[g]: simd_fsqrt); impl_unary!(f32x16[g]: simd_fsqrt); impl_unary!(f64x2[g]: simd_fsqrt); impl_unary!(f64x4[g]: simd_fsqrt); impl_unary!(f64x8[g]: simd_fsqrt); } } packed_simd-0.3.3/src/codegen/pointer_sized_int.rs010064400007650000024000000015201342460246600204750ustar0000000000000000//! Provides `isize` and `usize` use cfg_if::cfg_if; cfg_if! { if #[cfg(target_pointer_width = "8")] { crate type isize_ = i8; crate type usize_ = u8; } else if #[cfg(target_pointer_width = "16")] { crate type isize_ = i16; crate type usize_ = u16; } else if #[cfg(target_pointer_width = "32")] { crate type isize_ = i32; crate type usize_ = u32; } else if #[cfg(target_pointer_width = "64")] { crate type isize_ = i64; crate type usize_ = u64; } else if #[cfg(target_pointer_width = "64")] { crate type isize_ = i64; crate type usize_ = u64; } else if #[cfg(target_pointer_width = "128")] { crate type isize_ = i128; crate type usize_ = u128; } else { compile_error!("unsupported target_pointer_width"); } } packed_simd-0.3.3/src/codegen/reductions.rs010064400007650000024000000000201332536563700171240ustar0000000000000000crate mod mask; packed_simd-0.3.3/src/codegen/reductions/mask.rs010064400007650000024000000033101342636765500200700ustar0000000000000000//! Code generation workaround for `all()` mask horizontal reduction. //! //! Works arround [LLVM bug 36702]. //! //! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702 #![allow(unused_macros)] use crate::*; crate trait All: crate::marker::Sized { unsafe fn all(self) -> bool; } crate trait Any: crate::marker::Sized { unsafe fn any(self) -> bool; } #[macro_use] mod fallback_impl; cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { #[macro_use] mod x86; } else if #[cfg(all(target_arch = "arm", target_feature = "v7", target_feature = "neon", any(feature = "core_arch", libcore_neon)))] { #[macro_use] mod arm; } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { #[macro_use] mod aarch64; } else { #[macro_use] mod fallback; } } impl_mask_reductions!(m8x2); impl_mask_reductions!(m8x4); impl_mask_reductions!(m8x8); impl_mask_reductions!(m8x16); impl_mask_reductions!(m8x32); impl_mask_reductions!(m8x64); impl_mask_reductions!(m16x2); impl_mask_reductions!(m16x4); impl_mask_reductions!(m16x8); impl_mask_reductions!(m16x16); impl_mask_reductions!(m16x32); impl_mask_reductions!(m32x2); impl_mask_reductions!(m32x4); impl_mask_reductions!(m32x8); impl_mask_reductions!(m32x16); // FIXME: 64-bit single element vector // impl_mask_reductions!(m64x1); impl_mask_reductions!(m64x2); impl_mask_reductions!(m64x4); impl_mask_reductions!(m64x8); impl_mask_reductions!(m128x1); impl_mask_reductions!(m128x2); impl_mask_reductions!(m128x4); impl_mask_reductions!(msizex2); impl_mask_reductions!(msizex4); impl_mask_reductions!(msizex8); packed_simd-0.3.3/src/codegen/reductions/mask/aarch64.rs010064400007650000024000000043721342460246600213170ustar0000000000000000//! Mask reductions implementation for `aarch64` targets /// 128-bit wide vectors macro_rules! aarch64_128_neon_impl { ($id:ident, $vmin:ident, $vmax:ident) => { impl All for $id { #[inline] #[target_feature(enable = "neon")] unsafe fn all(self) -> bool { use crate::arch::aarch64::$vmin; $vmin(crate::mem::transmute(self)) != 0 } } impl Any for $id { #[inline] #[target_feature(enable = "neon")] unsafe fn any(self) -> bool { use crate::arch::aarch64::$vmax; $vmax(crate::mem::transmute(self)) != 0 } } } } /// 64-bit wide vectors macro_rules! aarch64_64_neon_impl { ($id:ident, $vec128:ident) => { impl All for $id { #[inline] #[target_feature(enable = "neon")] unsafe fn all(self) -> bool { // Duplicates the 64-bit vector into a 128-bit one and // calls all on that. union U { halves: ($id, $id), vec: $vec128, } U { halves: (self, self), }.vec.all() } } impl Any for $id { #[inline] #[target_feature(enable = "neon")] unsafe fn any(self) -> bool { union U { halves: ($id, $id), vec: $vec128, } U { halves: (self, self), }.vec.any() } } }; } /// Mask reduction implementation for `aarch64` targets macro_rules! impl_mask_reductions { // 64-bit wide masks (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); }; (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); }; (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); }; // 128-bit wide masks (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); }; (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); }; (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); }; // Fallback to LLVM's default code-generation: ($id:ident) => { fallback_impl!($id); }; } packed_simd-0.3.3/src/codegen/reductions/mask/arm.rs010064400007650000024000000035731342636765500206620ustar0000000000000000//! Mask reductions implementation for `arm` targets /// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with /// more than two elements. macro_rules! arm_128_v7_neon_impl { ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => { impl All for $id { #[inline] #[target_feature(enable = "v7,neon")] unsafe fn all(self) -> bool { use crate::arch::arm::$vpmin; use crate::mem::transmute; union U { halves: ($half, $half), vec: $id, } let halves = U { vec: self }.halves; let h: $half = transmute($vpmin( transmute(halves.0), transmute(halves.1), )); h.all() } } impl Any for $id { #[inline] #[target_feature(enable = "v7,neon")] unsafe fn any(self) -> bool { use crate::arch::arm::$vpmax; use crate::mem::transmute; union U { halves: ($half, $half), vec: $id, } let halves = U { vec: self }.halves; let h: $half = transmute($vpmax( transmute(halves.0), transmute(halves.1), )); h.any() } } }; } /// Mask reduction implementation for `arm` targets macro_rules! impl_mask_reductions { // 128-bit wide masks (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); }; (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); }; (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); }; // Fallback to LLVM's default code-generation: ($id:ident) => { fallback_impl!($id); }; } packed_simd-0.3.3/src/codegen/reductions/mask/fallback.rs010064400007650000024000000002521342460246600216170ustar0000000000000000//! Default mask reduction implementations. /// Default mask reduction implementation macro_rules! impl_mask_reductions { ($id:ident) => { fallback_impl!($id); }; } packed_simd-0.3.3/src/codegen/reductions/mask/fallback_impl.rs010064400007650000024000000147261342460246600226530ustar0000000000000000//! Default implementation of a mask reduction for any target. macro_rules! fallback_to_other_impl { ($id:ident, $other:ident) => { impl All for $id { #[inline] unsafe fn all(self) -> bool { let m: $other = crate::mem::transmute(self); m.all() } } impl Any for $id { #[inline] unsafe fn any(self) -> bool { let m: $other = crate::mem::transmute(self); m.any() } } }; } /// Fallback implementation. macro_rules! fallback_impl { // 16-bit wide masks: (m8x2) => { impl All for m8x2 { #[inline] unsafe fn all(self) -> bool { let i: u16 = crate::mem::transmute(self); i == u16::max_value() } } impl Any for m8x2 { #[inline] unsafe fn any(self) -> bool { let i: u16 = crate::mem::transmute(self); i != 0 } } }; // 32-bit wide masks (m8x4) => { impl All for m8x4 { #[inline] unsafe fn all(self) -> bool { let i: u32 = crate::mem::transmute(self); i == u32::max_value() } } impl Any for m8x4 { #[inline] unsafe fn any(self) -> bool { let i: u32 = crate::mem::transmute(self); i != 0 } } }; (m16x2) => { fallback_to_other_impl!(m16x2, m8x4); }; // 64-bit wide masks: (m8x8) => { impl All for m8x8 { #[inline] unsafe fn all(self) -> bool { let i: u64 = crate::mem::transmute(self); i == u64::max_value() } } impl Any for m8x8 { #[inline] unsafe fn any(self) -> bool { let i: u64 = crate::mem::transmute(self); i != 0 } } }; (m16x4) => { fallback_to_other_impl!(m16x4, m8x8); }; (m32x2) => { fallback_to_other_impl!(m32x2, m16x4); }; // FIXME: 64x1 maxk // 128-bit wide masks: (m8x16) => { impl All for m8x16 { #[inline] unsafe fn all(self) -> bool { let i: u128 = crate::mem::transmute(self); i == u128::max_value() } } impl Any for m8x16 { #[inline] unsafe fn any(self) -> bool { let i: u128 = crate::mem::transmute(self); i != 0 } } }; (m16x8) => { fallback_to_other_impl!(m16x8, m8x16); }; (m32x4) => { fallback_to_other_impl!(m32x4, m16x8); }; (m64x2) => { fallback_to_other_impl!(m64x2, m32x4); }; (m128x1) => { fallback_to_other_impl!(m128x1, m64x2); }; // 256-bit wide masks (m8x32) => { impl All for m8x32 { #[inline] unsafe fn all(self) -> bool { let i: [u128; 2] = crate::mem::transmute(self); let o: [u128; 2] = [u128::max_value(); 2]; i == o } } impl Any for m8x32 { #[inline] unsafe fn any(self) -> bool { let i: [u128; 2] = crate::mem::transmute(self); let o: [u128; 2] = [0; 2]; i != o } } }; (m16x16) => { fallback_to_other_impl!(m16x16, m8x32); }; (m32x8) => { fallback_to_other_impl!(m32x8, m16x16); }; (m64x4) => { fallback_to_other_impl!(m64x4, m32x8); }; (m128x2) => { fallback_to_other_impl!(m128x2, m64x4); }; // 512-bit wide masks (m8x64) => { impl All for m8x64 { #[inline] unsafe fn all(self) -> bool { let i: [u128; 4] = crate::mem::transmute(self); let o: [u128; 4] = [u128::max_value(); 4]; i == o } } impl Any for m8x64 { #[inline] unsafe fn any(self) -> bool { let i: [u128; 4] = crate::mem::transmute(self); let o: [u128; 4] = [0; 4]; i != o } } }; (m16x32) => { fallback_to_other_impl!(m16x32, m8x64); }; (m32x16) => { fallback_to_other_impl!(m32x16, m16x32); }; (m64x8) => { fallback_to_other_impl!(m64x8, m32x16); }; (m128x4) => { fallback_to_other_impl!(m128x4, m64x8); }; // Masks with pointer-sized elements64 (msizex2) => { cfg_if! { if #[cfg(target_pointer_width = "64")] { fallback_to_other_impl!(msizex2, m64x2); } else if #[cfg(target_pointer_width = "32")] { fallback_to_other_impl!(msizex2, m32x2); } else { compile_error!("unsupported target_pointer_width"); } } }; (msizex4) => { cfg_if! { if #[cfg(target_pointer_width = "64")] { fallback_to_other_impl!(msizex4, m64x4); } else if #[cfg(target_pointer_width = "32")] { fallback_to_other_impl!(msizex4, m32x4); } else { compile_error!("unsupported target_pointer_width"); } } }; (msizex8) => { cfg_if! { if #[cfg(target_pointer_width = "64")] { fallback_to_other_impl!(msizex8, m64x8); } else if #[cfg(target_pointer_width = "32")] { fallback_to_other_impl!(msizex8, m32x8); } else { compile_error!("unsupported target_pointer_width"); } } }; } macro_rules! recurse_half { ($vid:ident, $vid_h:ident) => { impl All for $vid { #[inline] unsafe fn all(self) -> bool { union U { halves: ($vid_h, $vid_h), vec: $vid, } let halves = U { vec: self }.halves; halves.0.all() && halves.1.all() } } impl Any for $vid { #[inline] unsafe fn any(self) -> bool { union U { halves: ($vid_h, $vid_h), vec: $vid, } let halves = U { vec: self }.halves; halves.0.any() || halves.1.any() } } }; } packed_simd-0.3.3/src/codegen/reductions/mask/x86.rs010064400007650000024000000123741342461154100205070ustar0000000000000000//! Mask reductions implementation for `x86` and `x86_64` targets #[cfg(target_feature = "sse")] #[macro_use] mod sse; #[cfg(target_feature = "sse2")] #[macro_use] mod sse2; #[cfg(target_feature = "avx")] #[macro_use] mod avx; #[cfg(target_feature = "avx2")] #[macro_use] mod avx2; /// x86 64-bit m8x8 implementation macro_rules! x86_m8x8_impl { ($id:ident) => { cfg_if! { if #[cfg(all(target_arch = "x86_64", target_feature = "sse"))] { x86_m8x8_sse_impl!($id); } else { fallback_impl!($id); } } }; } /// x86 128-bit m8x16 implementation macro_rules! x86_m8x16_impl { ($id:ident) => { cfg_if! { if #[cfg(target_feature = "sse2")] { x86_m8x16_sse2_impl!($id); } else { fallback_impl!($id); } } }; } /// x86 128-bit m32x4 implementation macro_rules! x86_m32x4_impl { ($id:ident) => { cfg_if! { if #[cfg(target_feature = "sse")] { x86_m32x4_sse_impl!($id); } else { fallback_impl!($id); } } }; } /// x86 128-bit m64x2 implementation macro_rules! x86_m64x2_impl { ($id:ident) => { cfg_if! { if #[cfg(target_feature = "sse2")] { x86_m64x2_sse2_impl!($id); } else if #[cfg(target_feature = "sse")] { x86_m32x4_sse_impl!($id); } else { fallback_impl!($id); } } }; } /// x86 256-bit m8x32 implementation macro_rules! x86_m8x32_impl { ($id:ident, $half_id:ident) => { cfg_if! { if #[cfg(target_feature = "avx2")] { x86_m8x32_avx2_impl!($id); } else if #[cfg(target_feature = "avx")] { x86_m8x32_avx_impl!($id); } else if #[cfg(target_feature = "sse2")] { recurse_half!($id, $half_id); } else { fallback_impl!($id); } } }; } /// x86 256-bit m32x8 implementation macro_rules! x86_m32x8_impl { ($id:ident, $half_id:ident) => { cfg_if! { if #[cfg(target_feature = "avx")] { x86_m32x8_avx_impl!($id); } else if #[cfg(target_feature = "sse")] { recurse_half!($id, $half_id); } else { fallback_impl!($id); } } }; } /// x86 256-bit m64x4 implementation macro_rules! x86_m64x4_impl { ($id:ident, $half_id:ident) => { cfg_if! { if #[cfg(target_feature = "avx")] { x86_m64x4_avx_impl!($id); } else if #[cfg(target_feature = "sse")] { recurse_half!($id, $half_id); } else { fallback_impl!($id); } } }; } /// Fallback implementation. macro_rules! x86_intr_impl { ($id:ident) => { impl All for $id { #[inline] unsafe fn all(self) -> bool { use crate::llvm::simd_reduce_all; simd_reduce_all(self.0) } } impl Any for $id { #[inline] unsafe fn any(self) -> bool { use crate::llvm::simd_reduce_any; simd_reduce_any(self.0) } } }; } /// Mask reduction implementation for `x86` and `x86_64` targets macro_rules! impl_mask_reductions { // 64-bit wide masks (m8x8) => { x86_m8x8_impl!(m8x8); }; (m16x4) => { x86_m8x8_impl!(m16x4); }; (m32x2) => { x86_m8x8_impl!(m32x2); }; // 128-bit wide masks (m8x16) => { x86_m8x16_impl!(m8x16); }; (m16x8) => { x86_m8x16_impl!(m16x8); }; (m32x4) => { x86_m32x4_impl!(m32x4); }; (m64x2) => { x86_m64x2_impl!(m64x2); }; (m128x1) => { x86_intr_impl!(m128x1); }; // 256-bit wide masks: (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); }; (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); }; (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); }; (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); }; (m128x2) => { x86_intr_impl!(m128x2); }; (msizex2) => { cfg_if! { if #[cfg(target_pointer_width = "64")] { fallback_to_other_impl!(msizex2, m64x2); } else if #[cfg(target_pointer_width = "32")] { fallback_to_other_impl!(msizex2, m32x2); } else { compile_error!("unsupported target_pointer_width"); } } }; (msizex4) => { cfg_if! { if #[cfg(target_pointer_width = "64")] { fallback_to_other_impl!(msizex4, m64x4); } else if #[cfg(target_pointer_width = "32")] { fallback_to_other_impl!(msizex4, m32x4); } else { compile_error!("unsupported target_pointer_width"); } } }; (msizex8) => { cfg_if! { if #[cfg(target_pointer_width = "64")] { fallback_to_other_impl!(msizex8, m64x8); } else if #[cfg(target_pointer_width = "32")] { fallback_to_other_impl!(msizex8, m32x8); } else { compile_error!("unsupported target_pointer_width"); } } }; // Fallback to LLVM's default code-generation: ($id:ident) => { fallback_impl!($id); }; } packed_simd-0.3.3/src/codegen/reductions/mask/x86/avx.rs010064400007650000024000000073421342460246600213120ustar0000000000000000//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX` /// `x86`/`x86_64` 256-bit `AVX` implementation /// FIXME: it might be faster here to do two `_mm_movmask_epi8` #[cfg(target_feature = "avx")] macro_rules! x86_m8x32_avx_impl { ($id:ident) => { impl All for $id { #[inline] #[target_feature(enable = "avx")] unsafe fn all(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm256_testc_si256; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_testc_si256; _mm256_testc_si256( crate::mem::transmute(self), crate::mem::transmute($id::splat(true)), ) != 0 } } impl Any for $id { #[inline] #[target_feature(enable = "avx")] unsafe fn any(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm256_testz_si256; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_testz_si256; _mm256_testz_si256( crate::mem::transmute(self), crate::mem::transmute(self), ) == 0 } } }; } /// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation macro_rules! x86_m32x8_avx_impl { ($id:ident) => { impl All for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn all(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm256_movemask_ps; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_movemask_ps; // _mm256_movemask_ps(a) creates a 8bit mask containing the // most significant bit of each lane of `a`. If all bits are // set, then all 8 lanes of the mask are true. _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32 } } impl Any for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn any(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm256_movemask_ps; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_movemask_ps; _mm256_movemask_ps(crate::mem::transmute(self)) != 0 } } }; } /// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation macro_rules! x86_m64x4_avx_impl { ($id:ident) => { impl All for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn all(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm256_movemask_pd; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_movemask_pd; // _mm256_movemask_pd(a) creates a 4bit mask containing the // most significant bit of each lane of `a`. If all bits are // set, then all 4 lanes of the mask are true. _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32 } } impl Any for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn any(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm256_movemask_pd; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_movemask_pd; _mm256_movemask_pd(crate::mem::transmute(self)) != 0 } } }; } packed_simd-0.3.3/src/codegen/reductions/mask/x86/avx2.rs010064400007650000024000000025071342460246600213720ustar0000000000000000//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`. #![allow(unused)] /// x86/x86_64 256-bit m8x32 AVX2 implementation macro_rules! x86_m8x32_avx2_impl { ($id:ident) => { impl All for $id { #[inline] #[target_feature(enable = "sse2")] unsafe fn all(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm256_movemask_epi8; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_movemask_epi8; // _mm256_movemask_epi8(a) creates a 32bit mask containing the // most significant bit of each byte of `a`. If all // bits are set, then all 32 lanes of the mask are // true. _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32 } } impl Any for $id { #[inline] #[target_feature(enable = "sse2")] unsafe fn any(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm256_movemask_epi8; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_movemask_epi8; _mm256_movemask_epi8(crate::mem::transmute(self)) != 0 } } }; } packed_simd-0.3.3/src/codegen/reductions/mask/x86/sse.rs010064400007650000024000000047431342461154100213020ustar0000000000000000//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`. #![allow(unused)] /// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation macro_rules! x86_m32x4_sse_impl { ($id:ident) => { impl All for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn all(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_movemask_ps; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_movemask_ps; // _mm_movemask_ps(a) creates a 4bit mask containing the // most significant bit of each lane of `a`. If all // bits are set, then all 4 lanes of the mask are // true. _mm_movemask_ps(crate::mem::transmute(self)) == 0b_1111_i32 } } impl Any for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn any(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_movemask_ps; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_movemask_ps; _mm_movemask_ps(crate::mem::transmute(self)) != 0 } } }; } macro_rules! x86_m8x8_sse_impl { ($id:ident) => { impl All for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn all(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_movemask_pi8; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_movemask_pi8; // _mm_movemask_pi8(a) creates an 8bit mask containing the most // significant bit of each byte of `a`. If all bits are set, // then all 8 lanes of the mask are true. _mm_movemask_pi8(crate::mem::transmute(self)) == u8::max_value() as i32 } } impl Any for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn any(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_movemask_pi8; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_movemask_pi8; _mm_movemask_pi8(crate::mem::transmute(self)) != 0 } } }; } packed_simd-0.3.3/src/codegen/reductions/mask/x86/sse2.rs010064400007650000024000000050751342460246600213710ustar0000000000000000//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`. #![allow(unused)] /// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation macro_rules! x86_m64x2_sse2_impl { ($id:ident) => { impl All for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn all(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_movemask_pd; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_movemask_pd; // _mm_movemask_pd(a) creates a 2bit mask containing the // most significant bit of each lane of `a`. If all // bits are set, then all 2 lanes of the mask are // true. _mm_movemask_pd(crate::mem::transmute(self)) == 0b_11_i32 } } impl Any for $id { #[inline] #[target_feature(enable = "sse")] unsafe fn any(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_movemask_pd; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_movemask_pd; _mm_movemask_pd(crate::mem::transmute(self)) != 0 } } }; } /// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation macro_rules! x86_m8x16_sse2_impl { ($id:ident) => { impl All for $id { #[inline] #[target_feature(enable = "sse2")] unsafe fn all(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_movemask_epi8; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_movemask_epi8; // _mm_movemask_epi8(a) creates a 16bit mask containing the // most significant bit of each byte of `a`. If all // bits are set, then all 16 lanes of the mask are // true. _mm_movemask_epi8(crate::mem::transmute(self)) == i32::from(u16::max_value()) } } impl Any for $id { #[inline] #[target_feature(enable = "sse2")] unsafe fn any(self) -> bool { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_movemask_epi8; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_movemask_epi8; _mm_movemask_epi8(crate::mem::transmute(self)) != 0 } } }; } packed_simd-0.3.3/src/codegen/shuffle.rs010064400007650000024000000161101342460246600164020ustar0000000000000000//! Implementations of the `ShuffleResult` trait for the different numbers of //! lanes and vector element types. use crate::masks::*; use crate::sealed::Shuffle; impl Shuffle<[u32; 2]> for i8 { type Output = crate::codegen::i8x2; } impl Shuffle<[u32; 4]> for i8 { type Output = crate::codegen::i8x4; } impl Shuffle<[u32; 8]> for i8 { type Output = crate::codegen::i8x8; } impl Shuffle<[u32; 16]> for i8 { type Output = crate::codegen::i8x16; } impl Shuffle<[u32; 32]> for i8 { type Output = crate::codegen::i8x32; } impl Shuffle<[u32; 64]> for i8 { type Output = crate::codegen::i8x64; } impl Shuffle<[u32; 2]> for u8 { type Output = crate::codegen::u8x2; } impl Shuffle<[u32; 4]> for u8 { type Output = crate::codegen::u8x4; } impl Shuffle<[u32; 8]> for u8 { type Output = crate::codegen::u8x8; } impl Shuffle<[u32; 16]> for u8 { type Output = crate::codegen::u8x16; } impl Shuffle<[u32; 32]> for u8 { type Output = crate::codegen::u8x32; } impl Shuffle<[u32; 64]> for u8 { type Output = crate::codegen::u8x64; } impl Shuffle<[u32; 2]> for m8 { type Output = crate::codegen::m8x2; } impl Shuffle<[u32; 4]> for m8 { type Output = crate::codegen::m8x4; } impl Shuffle<[u32; 8]> for m8 { type Output = crate::codegen::m8x8; } impl Shuffle<[u32; 16]> for m8 { type Output = crate::codegen::m8x16; } impl Shuffle<[u32; 32]> for m8 { type Output = crate::codegen::m8x32; } impl Shuffle<[u32; 64]> for m8 { type Output = crate::codegen::m8x64; } impl Shuffle<[u32; 2]> for i16 { type Output = crate::codegen::i16x2; } impl Shuffle<[u32; 4]> for i16 { type Output = crate::codegen::i16x4; } impl Shuffle<[u32; 8]> for i16 { type Output = crate::codegen::i16x8; } impl Shuffle<[u32; 16]> for i16 { type Output = crate::codegen::i16x16; } impl Shuffle<[u32; 32]> for i16 { type Output = crate::codegen::i16x32; } impl Shuffle<[u32; 2]> for u16 { type Output = crate::codegen::u16x2; } impl Shuffle<[u32; 4]> for u16 { type Output = crate::codegen::u16x4; } impl Shuffle<[u32; 8]> for u16 { type Output = crate::codegen::u16x8; } impl Shuffle<[u32; 16]> for u16 { type Output = crate::codegen::u16x16; } impl Shuffle<[u32; 32]> for u16 { type Output = crate::codegen::u16x32; } impl Shuffle<[u32; 2]> for m16 { type Output = crate::codegen::m16x2; } impl Shuffle<[u32; 4]> for m16 { type Output = crate::codegen::m16x4; } impl Shuffle<[u32; 8]> for m16 { type Output = crate::codegen::m16x8; } impl Shuffle<[u32; 16]> for m16 { type Output = crate::codegen::m16x16; } impl Shuffle<[u32; 32]> for m16 { type Output = crate::codegen::m16x32; } impl Shuffle<[u32; 2]> for i32 { type Output = crate::codegen::i32x2; } impl Shuffle<[u32; 4]> for i32 { type Output = crate::codegen::i32x4; } impl Shuffle<[u32; 8]> for i32 { type Output = crate::codegen::i32x8; } impl Shuffle<[u32; 16]> for i32 { type Output = crate::codegen::i32x16; } impl Shuffle<[u32; 2]> for u32 { type Output = crate::codegen::u32x2; } impl Shuffle<[u32; 4]> for u32 { type Output = crate::codegen::u32x4; } impl Shuffle<[u32; 8]> for u32 { type Output = crate::codegen::u32x8; } impl Shuffle<[u32; 16]> for u32 { type Output = crate::codegen::u32x16; } impl Shuffle<[u32; 2]> for f32 { type Output = crate::codegen::f32x2; } impl Shuffle<[u32; 4]> for f32 { type Output = crate::codegen::f32x4; } impl Shuffle<[u32; 8]> for f32 { type Output = crate::codegen::f32x8; } impl Shuffle<[u32; 16]> for f32 { type Output = crate::codegen::f32x16; } impl Shuffle<[u32; 2]> for m32 { type Output = crate::codegen::m32x2; } impl Shuffle<[u32; 4]> for m32 { type Output = crate::codegen::m32x4; } impl Shuffle<[u32; 8]> for m32 { type Output = crate::codegen::m32x8; } impl Shuffle<[u32; 16]> for m32 { type Output = crate::codegen::m32x16; } /* FIXME: 64-bit single element vector impl Shuffle<[u32; 1]> for i64 { type Output = crate::codegen::i64x1; } */ impl Shuffle<[u32; 2]> for i64 { type Output = crate::codegen::i64x2; } impl Shuffle<[u32; 4]> for i64 { type Output = crate::codegen::i64x4; } impl Shuffle<[u32; 8]> for i64 { type Output = crate::codegen::i64x8; } /* FIXME: 64-bit single element vector impl Shuffle<[u32; 1]> for u64 { type Output = crate::codegen::u64x1; } */ impl Shuffle<[u32; 2]> for u64 { type Output = crate::codegen::u64x2; } impl Shuffle<[u32; 4]> for u64 { type Output = crate::codegen::u64x4; } impl Shuffle<[u32; 8]> for u64 { type Output = crate::codegen::u64x8; } /* FIXME: 64-bit single element vector impl Shuffle<[u32; 1]> for f64 { type Output = crate::codegen::f64x1; } */ impl Shuffle<[u32; 2]> for f64 { type Output = crate::codegen::f64x2; } impl Shuffle<[u32; 4]> for f64 { type Output = crate::codegen::f64x4; } impl Shuffle<[u32; 8]> for f64 { type Output = crate::codegen::f64x8; } /* FIXME: 64-bit single element vector impl Shuffle<[u32; 1]> for m64 { type Output = crate::codegen::m64x1; } */ impl Shuffle<[u32; 2]> for m64 { type Output = crate::codegen::m64x2; } impl Shuffle<[u32; 4]> for m64 { type Output = crate::codegen::m64x4; } impl Shuffle<[u32; 8]> for m64 { type Output = crate::codegen::m64x8; } impl Shuffle<[u32; 2]> for isize { type Output = crate::codegen::isizex2; } impl Shuffle<[u32; 4]> for isize { type Output = crate::codegen::isizex4; } impl Shuffle<[u32; 8]> for isize { type Output = crate::codegen::isizex8; } impl Shuffle<[u32; 2]> for usize { type Output = crate::codegen::usizex2; } impl Shuffle<[u32; 4]> for usize { type Output = crate::codegen::usizex4; } impl Shuffle<[u32; 8]> for usize { type Output = crate::codegen::usizex8; } impl Shuffle<[u32; 2]> for *const T { type Output = crate::codegen::cptrx2; } impl Shuffle<[u32; 4]> for *const T { type Output = crate::codegen::cptrx4; } impl Shuffle<[u32; 8]> for *const T { type Output = crate::codegen::cptrx8; } impl Shuffle<[u32; 2]> for *mut T { type Output = crate::codegen::mptrx2; } impl Shuffle<[u32; 4]> for *mut T { type Output = crate::codegen::mptrx4; } impl Shuffle<[u32; 8]> for *mut T { type Output = crate::codegen::mptrx8; } impl Shuffle<[u32; 2]> for msize { type Output = crate::codegen::msizex2; } impl Shuffle<[u32; 4]> for msize { type Output = crate::codegen::msizex4; } impl Shuffle<[u32; 8]> for msize { type Output = crate::codegen::msizex8; } impl Shuffle<[u32; 1]> for i128 { type Output = crate::codegen::i128x1; } impl Shuffle<[u32; 2]> for i128 { type Output = crate::codegen::i128x2; } impl Shuffle<[u32; 4]> for i128 { type Output = crate::codegen::i128x4; } impl Shuffle<[u32; 1]> for u128 { type Output = crate::codegen::u128x1; } impl Shuffle<[u32; 2]> for u128 { type Output = crate::codegen::u128x2; } impl Shuffle<[u32; 4]> for u128 { type Output = crate::codegen::u128x4; } impl Shuffle<[u32; 1]> for m128 { type Output = crate::codegen::m128x1; } impl Shuffle<[u32; 2]> for m128 { type Output = crate::codegen::m128x2; } impl Shuffle<[u32; 4]> for m128 { type Output = crate::codegen::m128x4; } packed_simd-0.3.3/src/codegen/shuffle1_dyn.rs010064400007650000024000000373001342636765500173530ustar0000000000000000//! Shuffle vector lanes with run-time indices. use crate::*; pub trait Shuffle1Dyn { type Indices; fn shuffle1_dyn(self, _: Self::Indices) -> Self; } // Fallback implementation macro_rules! impl_fallback { ($id:ident) => { impl Shuffle1Dyn for $id { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { let mut result = Self::splat(0); for i in 0..$id::lanes() { result = result .replace(i, self.extract(indices.extract(i) as usize)); } result } } }; } macro_rules! impl_shuffle1_dyn { (u8x8) => { cfg_if! { if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "ssse3"))] { impl Shuffle1Dyn for u8x8 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_shuffle_pi8; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_shuffle_pi8; unsafe { crate::mem::transmute( _mm_shuffle_pi8( crate::mem::transmute(self.0), crate::mem::transmute(indices.0) ) ) } } } } else if #[cfg(all( any( all(target_aarch = "aarch64", target_feature = "neon"), all(target_aarch = "arm", target_feature = "v7", target_feature = "neon") ), any(feature = "core_arch", libcore_neon) ) )] { impl Shuffle1Dyn for u8x8 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { #[cfg(targt_arch = "aarch64")] use crate::arch::aarch64::vtbl1_u8; #[cfg(targt_arch = "arm")] use crate::arch::arm::vtbl1_u8; // This is safe because the binary is compiled with // neon enabled at compile-time and can therefore only // run on CPUs that have it enabled. unsafe { Simd(mem::transmute( vtbl1_u8(mem::transmute(self.0), crate::mem::transmute(indices.0)) )) } } } } else { impl_fallback!(u8x8); } } }; (u8x16) => { cfg_if! { if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "ssse3"))] { impl Shuffle1Dyn for u8x16 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { #[cfg(target_arch = "x86")] use crate::arch::x86::_mm_shuffle_epi8; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm_shuffle_epi8; // This is safe because the binary is compiled with // ssse3 enabled at compile-time and can therefore only // run on CPUs that have it enabled. unsafe { Simd(mem::transmute( _mm_shuffle_epi8(mem::transmute(self.0), crate::mem::transmute(indices)) )) } } } } else if #[cfg(all(target_aarch = "aarch64", target_feature = "neon", any(feature = "core_arch", libcore_neon)))] { impl Shuffle1Dyn for u8x16 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { use crate::arch::aarch64::vqtbl1q_u8; // This is safe because the binary is compiled with // neon enabled at compile-time and can therefore only // run on CPUs that have it enabled. unsafe { Simd(mem::transmute( vqtbl1q_u8(mem::transmute(self.0), crate::mem::transmute(indices.0)) )) } } } } else if #[cfg(all(target_aarch = "arm", target_feature = "v7", target_feature = "neon", any(feature = "core_arch", libcore_neon)))] { impl Shuffle1Dyn for u8x16 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { use crate::arch::arm::vtbl2_u8; // This is safe because the binary is compiled with // neon enabled at compile-time and can therefore only // run on CPUs that have it enabled. unsafe { union U { j: u8x16, s: (u8x8, u8x8), } let (i0, i1) = U { j: y }.s; let r0 = vtbl2_u8( mem::transmute(x), crate::mem::transmute(i0) ); let r1 = vtbl2_u8( mem::transmute(x), crate::mem::transmute(i1) ); let r = U { s: (r0, r1) }.j; Simd(mem::transmute(r)) } } } } else { impl_fallback!(u8x16); } } }; (u16x8) => { impl Shuffle1Dyn for u16x8 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { let indices: u8x8 = (indices * 2).cast(); let indices: u8x16 = shuffle!( indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7] ); let v = u8x16::new( 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 ); let indices = indices + v; unsafe { let s: u8x16 =crate::mem::transmute(self); crate::mem::transmute(s.shuffle1_dyn(indices)) } } } }; (u32x4) => { cfg_if! { if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx"))] { impl Shuffle1Dyn for u32x4 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { #[cfg(target_arch = "x86")] use crate::arch::x86::{_mm_permutevar_ps}; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::{_mm_permutevar_ps}; unsafe { crate::mem::transmute( _mm_permutevar_ps( crate::mem::transmute(self.0), crate::mem::transmute(indices.0) ) ) } } } } else { impl Shuffle1Dyn for u32x4 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { let indices: u8x4 = (indices * 4).cast(); let indices: u8x16 = shuffle!( indices, [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3] ); let v = u8x16::new( 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 ); let indices = indices + v; unsafe { let s: u8x16 =crate::mem::transmute(self); crate::mem::transmute(s.shuffle1_dyn(indices)) } } } } } }; (u64x2) => { cfg_if! { if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "avx"))] { impl Shuffle1Dyn for u64x2 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { #[cfg(target_arch = "x86")] use crate::arch::x86::{_mm_permutevar_pd}; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::{_mm_permutevar_pd}; // _mm_permutevar_pd uses the _second_ bit of each // element to perform the selection, that is: 0b00 => 0, // 0b10 => 1: let indices = indices << 1; unsafe { crate::mem::transmute( _mm_permutevar_pd( crate::mem::transmute(self), crate::mem::transmute(indices) ) ) } } } } else { impl Shuffle1Dyn for u64x2 { type Indices = Self; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { let indices: u8x2 = (indices * 8).cast(); let indices: u8x16 = shuffle!( indices, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ); let v = u8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 ); let indices = indices + v; unsafe { let s: u8x16 =crate::mem::transmute(self); crate::mem::transmute(s.shuffle1_dyn(indices)) } } } } } }; (u128x1) => { impl Shuffle1Dyn for u128x1 { type Indices = Self; #[inline] fn shuffle1_dyn(self, _indices: Self::Indices) -> Self { self } } }; ($id:ident) => { impl_fallback!($id); } } impl_shuffle1_dyn!(u8x2); impl_shuffle1_dyn!(u8x4); impl_shuffle1_dyn!(u8x8); impl_shuffle1_dyn!(u8x16); impl_shuffle1_dyn!(u8x32); impl_shuffle1_dyn!(u8x64); impl_shuffle1_dyn!(u16x2); impl_shuffle1_dyn!(u16x4); impl_shuffle1_dyn!(u16x8); impl_shuffle1_dyn!(u16x16); impl_shuffle1_dyn!(u16x32); impl_shuffle1_dyn!(u32x2); impl_shuffle1_dyn!(u32x4); impl_shuffle1_dyn!(u32x8); impl_shuffle1_dyn!(u32x16); impl_shuffle1_dyn!(u64x2); impl_shuffle1_dyn!(u64x4); impl_shuffle1_dyn!(u64x8); impl_shuffle1_dyn!(usizex2); impl_shuffle1_dyn!(usizex4); impl_shuffle1_dyn!(usizex8); impl_shuffle1_dyn!(u128x1); impl_shuffle1_dyn!(u128x2); impl_shuffle1_dyn!(u128x4); // Implementation for non-unsigned vector types macro_rules! impl_shuffle1_dyn_non_u { ($id:ident, $uid:ident) => { impl Shuffle1Dyn for $id { type Indices = $uid; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { unsafe { let u: $uid = crate::mem::transmute(self); crate::mem::transmute(u.shuffle1_dyn(indices)) } } } }; } impl_shuffle1_dyn_non_u!(i8x2, u8x2); impl_shuffle1_dyn_non_u!(i8x4, u8x4); impl_shuffle1_dyn_non_u!(i8x8, u8x8); impl_shuffle1_dyn_non_u!(i8x16, u8x16); impl_shuffle1_dyn_non_u!(i8x32, u8x32); impl_shuffle1_dyn_non_u!(i8x64, u8x64); impl_shuffle1_dyn_non_u!(i16x2, u16x2); impl_shuffle1_dyn_non_u!(i16x4, u16x4); impl_shuffle1_dyn_non_u!(i16x8, u16x8); impl_shuffle1_dyn_non_u!(i16x16, u16x16); impl_shuffle1_dyn_non_u!(i16x32, u16x32); impl_shuffle1_dyn_non_u!(i32x2, u32x2); impl_shuffle1_dyn_non_u!(i32x4, u32x4); impl_shuffle1_dyn_non_u!(i32x8, u32x8); impl_shuffle1_dyn_non_u!(i32x16, u32x16); impl_shuffle1_dyn_non_u!(i64x2, u64x2); impl_shuffle1_dyn_non_u!(i64x4, u64x4); impl_shuffle1_dyn_non_u!(i64x8, u64x8); impl_shuffle1_dyn_non_u!(isizex2, usizex2); impl_shuffle1_dyn_non_u!(isizex4, usizex4); impl_shuffle1_dyn_non_u!(isizex8, usizex8); impl_shuffle1_dyn_non_u!(i128x1, u128x1); impl_shuffle1_dyn_non_u!(i128x2, u128x2); impl_shuffle1_dyn_non_u!(i128x4, u128x4); impl_shuffle1_dyn_non_u!(m8x2, u8x2); impl_shuffle1_dyn_non_u!(m8x4, u8x4); impl_shuffle1_dyn_non_u!(m8x8, u8x8); impl_shuffle1_dyn_non_u!(m8x16, u8x16); impl_shuffle1_dyn_non_u!(m8x32, u8x32); impl_shuffle1_dyn_non_u!(m8x64, u8x64); impl_shuffle1_dyn_non_u!(m16x2, u16x2); impl_shuffle1_dyn_non_u!(m16x4, u16x4); impl_shuffle1_dyn_non_u!(m16x8, u16x8); impl_shuffle1_dyn_non_u!(m16x16, u16x16); impl_shuffle1_dyn_non_u!(m16x32, u16x32); impl_shuffle1_dyn_non_u!(m32x2, u32x2); impl_shuffle1_dyn_non_u!(m32x4, u32x4); impl_shuffle1_dyn_non_u!(m32x8, u32x8); impl_shuffle1_dyn_non_u!(m32x16, u32x16); impl_shuffle1_dyn_non_u!(m64x2, u64x2); impl_shuffle1_dyn_non_u!(m64x4, u64x4); impl_shuffle1_dyn_non_u!(m64x8, u64x8); impl_shuffle1_dyn_non_u!(msizex2, usizex2); impl_shuffle1_dyn_non_u!(msizex4, usizex4); impl_shuffle1_dyn_non_u!(msizex8, usizex8); impl_shuffle1_dyn_non_u!(m128x1, u128x1); impl_shuffle1_dyn_non_u!(m128x2, u128x2); impl_shuffle1_dyn_non_u!(m128x4, u128x4); impl_shuffle1_dyn_non_u!(f32x2, u32x2); impl_shuffle1_dyn_non_u!(f32x4, u32x4); impl_shuffle1_dyn_non_u!(f32x8, u32x8); impl_shuffle1_dyn_non_u!(f32x16, u32x16); impl_shuffle1_dyn_non_u!(f64x2, u64x2); impl_shuffle1_dyn_non_u!(f64x4, u64x4); impl_shuffle1_dyn_non_u!(f64x8, u64x8); // Implementation for non-unsigned vector types macro_rules! impl_shuffle1_dyn_ptr { ($id:ident, $uid:ident) => { impl Shuffle1Dyn for $id { type Indices = $uid; #[inline] fn shuffle1_dyn(self, indices: Self::Indices) -> Self { unsafe { let u: $uid = crate::mem::transmute(self); crate::mem::transmute(u.shuffle1_dyn(indices)) } } } }; } impl_shuffle1_dyn_ptr!(cptrx2, usizex2); impl_shuffle1_dyn_ptr!(cptrx4, usizex4); impl_shuffle1_dyn_ptr!(cptrx8, usizex8); impl_shuffle1_dyn_ptr!(mptrx2, usizex2); impl_shuffle1_dyn_ptr!(mptrx4, usizex4); impl_shuffle1_dyn_ptr!(mptrx8, usizex8); packed_simd-0.3.3/src/codegen/swap_bytes.rs010064400007650000024000000131071342460246600171310ustar0000000000000000//! Horizontal swap bytes reductions. // FIXME: investigate using `llvm.bswap` // https://github.com/rust-lang-nursery/packed_simd/issues/19 use crate::*; crate trait SwapBytes { fn swap_bytes(self) -> Self; } macro_rules! impl_swap_bytes { (v16: $($id:ident,)+) => { $( impl SwapBytes for $id { #[inline] fn swap_bytes(self) -> Self { unsafe { shuffle!(self, [1, 0]) } } } )+ }; (v32: $($id:ident,)+) => { $( impl SwapBytes for $id { #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] fn swap_bytes(self) -> Self { unsafe { let bytes: u8x4 = crate::mem::transmute(self); let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]); crate::mem::transmute(result) } } } )+ }; (v64: $($id:ident,)+) => { $( impl SwapBytes for $id { #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] fn swap_bytes(self) -> Self { unsafe { let bytes: u8x8 = crate::mem::transmute(self); let result: u8x8 = shuffle!( bytes, [7, 6, 5, 4, 3, 2, 1, 0] ); crate::mem::transmute(result) } } } )+ }; (v128: $($id:ident,)+) => { $( impl SwapBytes for $id { #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] fn swap_bytes(self) -> Self { unsafe { let bytes: u8x16 = crate::mem::transmute(self); let result: u8x16 = shuffle!(bytes, [ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]); crate::mem::transmute(result) } } } )+ }; (v256: $($id:ident,)+) => { $( impl SwapBytes for $id { #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] fn swap_bytes(self) -> Self { unsafe { let bytes: u8x32 = crate::mem::transmute(self); let result: u8x32 = shuffle!(bytes, [ 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]); crate::mem::transmute(result) } } } )+ }; (v512: $($id:ident,)+) => { $( impl SwapBytes for $id { #[inline] #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] fn swap_bytes(self) -> Self { unsafe { let bytes: u8x64 = crate::mem::transmute(self); let result: u8x64 = shuffle!(bytes, [ 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]); crate::mem::transmute(result) } } } )+ }; } impl_swap_bytes!(v16: u8x2, i8x2,); impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,); // FIXME: 64-bit single element vector impl_swap_bytes!( v64: u8x8, i8x8, u16x4, i16x4, u32x2, i32x2, /* u64x1, i64x1, */ ); impl_swap_bytes!( v128: u8x16, i8x16, u16x8, i16x8, u32x4, i32x4, u64x2, i64x2, u128x1, i128x1, ); impl_swap_bytes!( v256: u8x32, i8x32, u16x16, i16x16, u32x8, i32x8, u64x4, i64x4, u128x2, i128x2, ); impl_swap_bytes!( v512: u8x64, i8x64, u16x32, i16x32, u32x16, i32x16, u64x8, i64x8, u128x4, i128x4, ); cfg_if! { if #[cfg(target_pointer_width = "8")] { impl_swap_bytes!(v16: isizex2, usizex2,); impl_swap_bytes!(v32: isizex4, usizex4,); impl_swap_bytes!(v64: isizex8, usizex8,); } else if #[cfg(target_pointer_width = "16")] { impl_swap_bytes!(v32: isizex2, usizex2,); impl_swap_bytes!(v64: isizex4, usizex4,); impl_swap_bytes!(v128: isizex8, usizex8,); } else if #[cfg(target_pointer_width = "32")] { impl_swap_bytes!(v64: isizex2, usizex2,); impl_swap_bytes!(v128: isizex4, usizex4,); impl_swap_bytes!(v256: isizex8, usizex8,); } else if #[cfg(target_pointer_width = "64")] { impl_swap_bytes!(v128: isizex2, usizex2,); impl_swap_bytes!(v256: isizex4, usizex4,); impl_swap_bytes!(v512: isizex8, usizex8,); } else { compile_error!("unsupported target_pointer_width"); } } packed_simd-0.3.3/src/codegen/v128.rs010064400007650000024000000023431342460246600154510ustar0000000000000000//! Internal 128-bit wide vector types use crate::masks::*; #[rustfmt::skip] impl_simd_array!( [i8; 16]: i8x16 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 ); #[rustfmt::skip] impl_simd_array!( [u8; 16]: u8x16 | u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 ); #[rustfmt::skip] impl_simd_array!( [m8; 16]: m8x16 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 ); impl_simd_array!([i16; 8]: i16x8 | i16, i16, i16, i16, i16, i16, i16, i16); impl_simd_array!([u16; 8]: u16x8 | u16, u16, u16, u16, u16, u16, u16, u16); impl_simd_array!([m16; 8]: m16x8 | i16, i16, i16, i16, i16, i16, i16, i16); impl_simd_array!([i32; 4]: i32x4 | i32, i32, i32, i32); impl_simd_array!([u32; 4]: u32x4 | u32, u32, u32, u32); impl_simd_array!([f32; 4]: f32x4 | f32, f32, f32, f32); impl_simd_array!([m32; 4]: m32x4 | i32, i32, i32, i32); impl_simd_array!([i64; 2]: i64x2 | i64, i64); impl_simd_array!([u64; 2]: u64x2 | u64, u64); impl_simd_array!([f64; 2]: f64x2 | f64, f64); impl_simd_array!([m64; 2]: m64x2 | i64, i64); impl_simd_array!([i128; 1]: i128x1 | i128); impl_simd_array!([u128; 1]: u128x1 | u128); impl_simd_array!([m128; 1]: m128x1 | i128); packed_simd-0.3.3/src/codegen/v16.rs010064400007650000024000000002731332536563700153730ustar0000000000000000//! Internal 16-bit wide vector types use crate::masks::*; impl_simd_array!([i8; 2]: i8x2 | i8, i8); impl_simd_array!([u8; 2]: u8x2 | u8, u8); impl_simd_array!([m8; 2]: m8x2 | i8, i8); packed_simd-0.3.3/src/codegen/v256.rs010064400007650000024000000035171342460246600154570ustar0000000000000000//! Internal 256-bit wide vector types use crate::masks::*; #[rustfmt::skip] impl_simd_array!( [i8; 32]: i8x32 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 ); #[rustfmt::skip] impl_simd_array!( [u8; 32]: u8x32 | u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 ); #[rustfmt::skip] impl_simd_array!( [m8; 32]: m8x32 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 ); #[rustfmt::skip] impl_simd_array!( [i16; 16]: i16x16 | i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 ); #[rustfmt::skip] impl_simd_array!( [u16; 16]: u16x16 | u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16 ); #[rustfmt::skip] impl_simd_array!( [m16; 16]: m16x16 | i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 ); impl_simd_array!([i32; 8]: i32x8 | i32, i32, i32, i32, i32, i32, i32, i32); impl_simd_array!([u32; 8]: u32x8 | u32, u32, u32, u32, u32, u32, u32, u32); impl_simd_array!([f32; 8]: f32x8 | f32, f32, f32, f32, f32, f32, f32, f32); impl_simd_array!([m32; 8]: m32x8 | i32, i32, i32, i32, i32, i32, i32, i32); impl_simd_array!([i64; 4]: i64x4 | i64, i64, i64, i64); impl_simd_array!([u64; 4]: u64x4 | u64, u64, u64, u64); impl_simd_array!([f64; 4]: f64x4 | f64, f64, f64, f64); impl_simd_array!([m64; 4]: m64x4 | i64, i64, i64, i64); impl_simd_array!([i128; 2]: i128x2 | i128, i128); impl_simd_array!([u128; 2]: u128x2 | u128, u128); impl_simd_array!([m128; 2]: m128x2 | i128, i128); packed_simd-0.3.3/src/codegen/v32.rs010064400007650000024000000005361332536563700153730ustar0000000000000000//! Internal 32-bit wide vector types use crate::masks::*; impl_simd_array!([i8; 4]: i8x4 | i8, i8, i8, i8); impl_simd_array!([u8; 4]: u8x4 | u8, u8, u8, u8); impl_simd_array!([m8; 4]: m8x4 | i8, i8, i8, i8); impl_simd_array!([i16; 2]: i16x2 | i16, i16); impl_simd_array!([u16; 2]: u16x2 | u16, u16); impl_simd_array!([m16; 2]: m16x2 | i16, i16); packed_simd-0.3.3/src/codegen/v512.rs010064400007650000024000000060121342460246600154430ustar0000000000000000//! Internal 512-bit wide vector types use crate::masks::*; #[rustfmt::skip] impl_simd_array!( [i8; 64]: i8x64 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 ); #[rustfmt::skip] impl_simd_array!( [u8; 64]: u8x64 | u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 ); #[rustfmt::skip] impl_simd_array!( [m8; 64]: m8x64 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 ); #[rustfmt::skip] impl_simd_array!( [i16; 32]: i16x32 | i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 ); #[rustfmt::skip] impl_simd_array!( [u16; 32]: u16x32 | u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16 ); #[rustfmt::skip] impl_simd_array!( [m16; 32]: m16x32 | i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 ); #[rustfmt::skip] impl_simd_array!( [i32; 16]: i32x16 | i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 ); #[rustfmt::skip] impl_simd_array!( [u32; 16]: u32x16 | u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32 ); #[rustfmt::skip] impl_simd_array!( [f32; 16]: f32x16 | f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32 ); #[rustfmt::skip] impl_simd_array!( [m32; 16]: m32x16 | i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 ); impl_simd_array!([i64; 8]: i64x8 | i64, i64, i64, i64, i64, i64, i64, i64); impl_simd_array!([u64; 8]: u64x8 | u64, u64, u64, u64, u64, u64, u64, u64); impl_simd_array!([f64; 8]: f64x8 | f64, f64, f64, f64, f64, f64, f64, f64); impl_simd_array!([m64; 8]: m64x8 | i64, i64, i64, i64, i64, i64, i64, i64); impl_simd_array!([i128; 4]: i128x4 | i128, i128, i128, i128); impl_simd_array!([u128; 4]: u128x4 | u128, u128, u128, u128); impl_simd_array!([m128; 4]: m128x4 | i128, i128, i128, i128); packed_simd-0.3.3/src/codegen/v64.rs010064400007650000024000000014121332536563700153720ustar0000000000000000//! Internal 64-bit wide vector types use crate::masks::*; impl_simd_array!([i8; 8]: i8x8 | i8, i8, i8, i8, i8, i8, i8, i8); impl_simd_array!([u8; 8]: u8x8 | u8, u8, u8, u8, u8, u8, u8, u8); impl_simd_array!([m8; 8]: m8x8 | i8, i8, i8, i8, i8, i8, i8, i8); impl_simd_array!([i16; 4]: i16x4 | i16, i16, i16, i16); impl_simd_array!([u16; 4]: u16x4 | u16, u16, u16, u16); impl_simd_array!([m16; 4]: m16x4 | i16, i16, i16, i16); impl_simd_array!([i32; 2]: i32x2 | i32, i32); impl_simd_array!([u32; 2]: u32x2 | u32, u32); impl_simd_array!([f32; 2]: f32x2 | f32, f32); impl_simd_array!([m32; 2]: m32x2 | i32, i32); impl_simd_array!([i64; 1]: i64x1 | i64); impl_simd_array!([u64; 1]: u64x1 | u64); impl_simd_array!([f64; 1]: f64x1 | f64); impl_simd_array!([m64; 1]: m64x1 | i64); packed_simd-0.3.3/src/codegen/vPtr.rs010064400007650000024000000021061342460246600157010ustar0000000000000000//! Pointer vector types macro_rules! impl_simd_ptr { ([$ptr_ty:ty; $elem_count:expr]: $tuple_id:ident | $ty:ident | $($tys:ty),*) => { #[derive(Copy, Clone)] #[repr(simd)] pub struct $tuple_id<$ty>($(crate $tys),*); //^^^^^^^ leaked through SimdArray impl<$ty> crate::sealed::SimdArray for [$ptr_ty; $elem_count] { type Tuple = $tuple_id<$ptr_ty>; type T = $ptr_ty; const N: usize = $elem_count; type NT = [u32; $elem_count]; } impl<$ty> crate::sealed::Simd for $tuple_id<$ptr_ty> { type Element = $ptr_ty; const LANES: usize = $elem_count; type LanesType = [u32; $elem_count]; } } } impl_simd_ptr!([*const T; 2]: cptrx2 | T | T, T); impl_simd_ptr!([*const T; 4]: cptrx4 | T | T, T, T, T); impl_simd_ptr!([*const T; 8]: cptrx8 | T | T, T, T, T, T, T, T, T); impl_simd_ptr!([*mut T; 2]: mptrx2 | T | T, T); impl_simd_ptr!([*mut T; 4]: mptrx4 | T | T, T, T, T); impl_simd_ptr!([*mut T; 8]: mptrx8 | T | T, T, T, T, T, T, T, T); packed_simd-0.3.3/src/codegen/vSize.rs010064400007650000024000000016351333454520600160520ustar0000000000000000//! Vector types with pointer-sized elements use crate::codegen::pointer_sized_int::{isize_, usize_}; use crate::masks::*; impl_simd_array!([isize; 2]: isizex2 | isize_, isize_); impl_simd_array!([usize; 2]: usizex2 | usize_, usize_); impl_simd_array!([msize; 2]: msizex2 | isize_, isize_); impl_simd_array!([isize; 4]: isizex4 | isize_, isize_, isize_, isize_); impl_simd_array!([usize; 4]: usizex4 | usize_, usize_, usize_, usize_); impl_simd_array!([msize; 4]: msizex4 | isize_, isize_, isize_, isize_); impl_simd_array!( [isize; 8]: isizex8 | isize_, isize_, isize_, isize_, isize_, isize_, isize_, isize_ ); impl_simd_array!( [usize; 8]: usizex8 | usize_, usize_, usize_, usize_, usize_, usize_, usize_, usize_ ); impl_simd_array!( [msize; 8]: msizex8 | isize_, isize_, isize_, isize_, isize_, isize_, isize_, isize_ ); packed_simd-0.3.3/src/lib.rs010064400007650000024000000244021342636765500141250ustar0000000000000000//! # Portable packed SIMD vectors //! //! This crate is proposed for stabilization as `std::packed_simd` in [RFC2366: //! `std::simd`](https://github.com/rust-lang/rfcs/pull/2366) . //! //! The examples available in the //! [`examples/`](https://github.com/rust-lang-nursery/packed_simd/tree/master/examples) //! sub-directory of the crate showcase how to use the library in practice. //! //! ## Table of contents //! //! - [Introduction](#introduction) //! - [Vector types](#vector-types) //! - [Conditional operations](#conditional-operations) //! - [Conversions](#conversions) //! - [Performance //! guide](https://rust-lang-nursery.github.io/packed_simd/perf-guide/) //! //! ## Introduction //! //! This crate exports [`Simd<[T; N]>`][`Simd`]: a packed vector of `N` //! elements of type `T` as well as many type aliases for this type: for //! example, [`f32x4`], which is just an alias for `Simd<[f32; 4]>`. //! //! The operations on packed vectors are, by default, "vertical", that is, they //! are applied to each vector lane in isolation of the others: //! //! ``` //! # use packed_simd::*; //! let a = i32x4::new(1, 2, 3, 4); //! let b = i32x4::new(5, 6, 7, 8); //! assert_eq!(a + b, i32x4::new(6, 8, 10, 12)); //! ``` //! //! Many "horizontal" operations are also provided: //! //! ``` //! # use packed_simd::*; //! # let a = i32x4::new(1, 2, 3, 4); //! assert_eq!(a.wrapping_sum(), 10); //! ``` //! //! In virtually all architectures vertical operations are fast, while //! horizontal operations are, by comparison, much slower. That is, the //! most portably-efficient way of performing a reduction over a slice //! is to collect the results into a vector using vertical operations, //! and performing a single horizontal operation at the end: //! //! ``` //! # use packed_simd::*; //! fn reduce(x: &[i32]) -> i32 { //! assert!(x.len() % 4 == 0); //! let mut sum = i32x4::splat(0); // [0, 0, 0, 0] //! for i in (0..x.len()).step_by(4) { //! sum += i32x4::from_slice_unaligned(&x[i..]); //! } //! sum.wrapping_sum() //! } //! //! let x = [0, 1, 2, 3, 4, 5, 6, 7]; //! assert_eq!(reduce(&x), 28); //! ``` //! //! ## Vector types //! //! The vector type aliases are named according to the following scheme: //! //! > `{element_type}x{number_of_lanes} == Simd<[element_type; //! number_of_lanes]>` //! //! where the following element types are supported: //! //! * `i{element_width}`: signed integer //! * `u{element_width}`: unsigned integer //! * `f{element_width}`: float //! * `m{element_width}`: mask (see below) //! * `*{const,mut} T`: `const` and `mut` pointers //! //! ## Basic operations //! //! ``` //! # use packed_simd::*; //! // Sets all elements to `0`: //! let a = i32x4::splat(0); //! //! // Reads a vector from a slice: //! let mut arr = [0, 0, 0, 1, 2, 3, 4, 5]; //! let b = i32x4::from_slice_unaligned(&arr); //! //! // Reads the 4-th element of a vector: //! assert_eq!(b.extract(3), 1); //! //! // Returns a new vector where the 4-th element is replaced with `1`: //! let a = a.replace(3, 1); //! assert_eq!(a, b); //! //! // Writes a vector to a slice: //! let a = a.replace(2, 1); //! a.write_to_slice_unaligned(&mut arr[4..]); //! assert_eq!(arr, [0, 0, 0, 1, 0, 0, 1, 1]); //! ``` //! //! ## Conditional operations //! //! One often needs to perform an operation on some lanes of the vector. Vector //! masks, like `m32x4`, allow selecting on which vector lanes an operation is //! to be performed: //! //! ``` //! # use packed_simd::*; //! let a = i32x4::new(1, 1, 2, 2); //! //! // Add `1` to the first two lanes of the vector. //! let m = m16x4::new(true, true, false, false); //! let a = m.select(a + 1, a); //! assert_eq!(a, i32x4::splat(2)); //! ``` //! //! The elements of a vector mask are either `true` or `false`. Here `true` //! means that a lane is "selected", while `false` means that a lane is not //! selected. //! //! All vector masks implement a `mask.select(a: T, b: T) -> T` method that //! works on all vectors that have the same number of lanes as the mask. The //! resulting vector contains the elements of `a` for those lanes for which the //! mask is `true`, and the elements of `b` otherwise. //! //! The example constructs a mask with the first two lanes set to `true` and //! the last two lanes set to `false`. This selects the first two lanes of `a + //! 1` and the last two lanes of `a`, producing a vector where the first two //! lanes have been incremented by `1`. //! //! > note: mask `select` can be used on vector types that have the same number //! > of lanes as the mask. The example shows this by using [`m16x4`] instead //! > of [`m32x4`]. It is _typically_ more performant to use a mask element //! > width equal to the element width of the vectors being operated upon. //! > This is, however, not true for 512-bit wide vectors when targetting //! > AVX-512, where the most efficient masks use only 1-bit per element. //! //! All vertical comparison operations returns masks: //! //! ``` //! # use packed_simd::*; //! let a = i32x4::new(1, 1, 3, 3); //! let b = i32x4::new(2, 2, 0, 0); //! //! // ge: >= (Greater Eequal; see also lt, le, gt, eq, ne). //! let m = a.ge(i32x4::splat(2)); //! //! if m.any() { //! // all / any / none allow coherent control flow //! let d = m.select(a, b); //! assert_eq!(d, i32x4::new(2, 2, 3, 3)); //! } //! ``` //! //! ## Conversions //! //! * **lossless widening conversions**: [`From`]/[`Into`] are implemented for //! vectors with the same number of lanes when the conversion is value //! preserving (same as in `std`). //! //! * **safe bitwise conversions**: The cargo feature `into_bits` provides the //! `IntoBits/FromBits` traits (`x.into_bits()`). These perform safe bitwise //! `transmute`s when all bit patterns of the source type are valid bit //! patterns of the target type and are also implemented for the //! architecture-specific vector types of `std::arch`. For example, `let x: //! u8x8 = m8x8::splat(true).into_bits();` is provided because all `m8x8` bit //! patterns are valid `u8x8` bit patterns. However, the opposite is not //! true, not all `u8x8` bit patterns are valid `m8x8` bit-patterns, so this //! operation cannot be peformed safely using `x.into_bits()`; one needs to //! use `unsafe { crate::mem::transmute(x) }` for that, making sure that the //! value in the `u8x8` is a valid bit-pattern of `m8x8`. //! //! * **numeric casts** (`as`): are peformed using [`FromCast`]/[`Cast`] //! (`x.cast()`), just like `as`: //! //! * casting integer vectors whose lane types have the same size (e.g. //! `i32xN` -> `u32xN`) is a **no-op**, //! //! * casting from a larger integer to a smaller integer (e.g. `u32xN` -> //! `u8xN`) will **truncate**, //! //! * casting from a smaller integer to a larger integer (e.g. `u8xN` -> //! `u32xN`) will: //! * **zero-extend** if the source is unsigned, or //! * **sign-extend** if the source is signed, //! //! * casting from a float to an integer will **round the float towards //! zero**, //! //! * casting from an integer to float will produce the floating point //! representation of the integer, **rounding to nearest, ties to even**, //! //! * casting from an `f32` to an `f64` is perfect and lossless, //! //! * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. //! //! Numeric casts are not very "precise": sometimes lossy, sometimes value //! preserving, etc. #![feature( repr_simd, const_fn, platform_intrinsics, stdsimd, aarch64_target_feature, arm_target_feature, link_llvm_intrinsics, core_intrinsics, stmt_expr_attributes, align_offset, mmx_target_feature, crate_visibility_modifier, custom_inner_attributes )] #![allow(non_camel_case_types, non_snake_case)] #![cfg_attr(test, feature(hashmap_internals))] #![cfg_attr( feature = "cargo-clippy", allow( clippy::cast_possible_truncation, clippy::cast_lossless, clippy::cast_possible_wrap, clippy::cast_precision_loss, // This lint is currently broken for generic code // See https://github.com/rust-lang/rust-clippy/issues/3410 clippy::use_self ) )] #![cfg_attr( feature = "cargo-clippy", deny(clippy::missing_inline_in_public_items) )] #![deny(warnings, rust_2018_idioms)] #![no_std] use cfg_if::cfg_if; cfg_if! { if #[cfg(feature = "core_arch")] { #[allow(unused_imports)] use core_arch as arch; } else { #[allow(unused_imports)] use core::arch; } } #[cfg(all(target_arch = "wasm32", test))] use wasm_bindgen_test::*; #[allow(unused_imports)] use core::{ /* arch (handled above), */ cmp, f32, f64, fmt, hash, hint, i128, i16, i32, i64, i8, intrinsics, isize, iter, marker, mem, ops, ptr, slice, u128, u16, u32, u64, u8, usize, }; #[macro_use] mod testing; #[macro_use] mod api; mod codegen; mod sealed; /// Packed SIMD vector type. /// /// # Examples /// /// ``` /// # use packed_simd::Simd; /// let v = Simd::<[i32; 4]>::new(0, 1, 2, 3); /// assert_eq!(v.extract(2), 2); /// ``` #[repr(transparent)] #[derive(Copy, Clone)] pub struct Simd( // FIXME: this type should be private, // but it currently must be public for the // `shuffle!` macro to work: it needs to // access the internal `repr(simd)` type // to call the shuffle intrinsics. #[doc(hidden)] pub ::Tuple, ); /// Wrapper over `T` implementing a lexicoraphical order via the `PartialOrd` /// and/or `Ord` traits. #[repr(transparent)] #[derive(Copy, Clone, Debug)] #[cfg_attr( feature = "cargo-clippy", allow(clippy::missing_inline_in_public_items) )] pub struct LexicographicallyOrdered(T); mod masks; pub use self::masks::*; mod v16; pub use self::v16::*; mod v32; pub use self::v32::*; mod v64; pub use self::v64::*; mod v128; pub use self::v128::*; mod v256; pub use self::v256::*; mod v512; pub use self::v512::*; mod vSize; pub use self::vSize::*; mod vPtr; pub use self::vPtr::*; pub use self::api::cast::*; #[cfg(feature = "into_bits")] pub use self::api::into_bits::*; // Re-export the shuffle intrinsics required by the `shuffle!` macro. #[doc(hidden)] pub use self::codegen::llvm::{ __shuffle_vector16, __shuffle_vector2, __shuffle_vector32, __shuffle_vector4, __shuffle_vector64, __shuffle_vector8, }; crate mod llvm { crate use crate::codegen::llvm::*; } packed_simd-0.3.3/src/masks.rs010064400007650000024000000066141342460246600144700ustar0000000000000000//! Mask types macro_rules! impl_mask_ty { ($id:ident : $elem_ty:ident | #[$doc:meta]) => { #[$doc] #[derive(Copy, Clone)] pub struct $id($elem_ty); impl crate::sealed::Mask for $id { fn test(&self) -> bool { $id::test(self) } } impl $id { /// Instantiate a mask with `value` #[inline] pub fn new(x: bool) -> Self { if x { $id(!0) } else { $id(0) } } /// Test if the mask is set #[inline] pub fn test(&self) -> bool { self.0 != 0 } } impl Default for $id { #[inline] fn default() -> Self { $id(0) } } #[cfg_attr(feature = "cargo-clippy", allow(clippy::partialeq_ne_impl))] impl PartialEq<$id> for $id { #[inline] fn eq(&self, other: &Self) -> bool { self.0 == other.0 } #[inline] fn ne(&self, other: &Self) -> bool { self.0 != other.0 } } impl Eq for $id {} impl PartialOrd<$id> for $id { #[inline] fn partial_cmp( &self, other: &Self, ) -> Option { use crate::cmp::Ordering; if self == other { Some(Ordering::Equal) } else if self.0 > other.0 { // Note: // * false = 0_i // * true == !0_i == -1_i Some(Ordering::Less) } else { Some(Ordering::Greater) } } #[inline] fn lt(&self, other: &Self) -> bool { self.0 > other.0 } #[inline] fn gt(&self, other: &Self) -> bool { self.0 < other.0 } #[inline] fn le(&self, other: &Self) -> bool { self.0 >= other.0 } #[inline] fn ge(&self, other: &Self) -> bool { self.0 <= other.0 } } impl Ord for $id { #[inline] fn cmp(&self, other: &Self) -> crate::cmp::Ordering { match self.partial_cmp(other) { Some(x) => x, None => unsafe { crate::hint::unreachable_unchecked() }, } } } impl crate::hash::Hash for $id { #[inline] fn hash(&self, state: &mut H) { (self.0 != 0).hash(state); } } impl crate::fmt::Debug for $id { #[inline] fn fmt( &self, fmtter: &mut crate::fmt::Formatter<'_>, ) -> Result<(), crate::fmt::Error> { write!(fmtter, "{}({})", stringify!($id), self.0 != 0) } } }; } impl_mask_ty!(m8: i8 | /// 8-bit wide mask. ); impl_mask_ty!(m16: i16 | /// 16-bit wide mask. ); impl_mask_ty!(m32: i32 | /// 32-bit wide mask. ); impl_mask_ty!(m64: i64 | /// 64-bit wide mask. ); impl_mask_ty!(m128: i128 | /// 128-bit wide mask. ); impl_mask_ty!(msize: isize | /// isize-wide mask. ); packed_simd-0.3.3/src/sealed.rs010064400007650000024000000017671333454520600146110ustar0000000000000000//! Sealed traits /// Trait implemented by arrays that can be SIMD types. #[doc(hidden)] pub trait SimdArray { /// The type of the #[repr(simd)] type. type Tuple: Copy + Clone; /// The element type of the vector. type T; /// The number of elements in the array. const N: usize; /// The type: `[u32; Self::N]`. type NT; } /// This traits is used to constraint the arguments /// and result type of the portable shuffles. #[doc(hidden)] pub trait Shuffle { // Lanes is a `[u32; N]` where `N` is the number of vector lanes /// The result type of the shuffle. type Output; } /// This trait is implemented by all SIMD vector types. #[doc(hidden)] pub trait Simd { /// Element type of the SIMD vector type Element; /// The number of elements in the SIMD vector. const LANES: usize; /// The type: `[u32; Self::N]`. type LanesType; } /// This trait is implemented by all mask types #[doc(hidden)] pub trait Mask { fn test(&self) -> bool; } packed_simd-0.3.3/src/testing.rs010064400007650000024000000001561333454520600150200ustar0000000000000000//! Testing macros and other utilities. #[macro_use] mod macros; #[cfg(test)] #[macro_use] crate mod utils; packed_simd-0.3.3/src/testing/macros.rs010064400007650000024000000023741333522741000163030ustar0000000000000000//! Testing macros macro_rules! test_if { ($cfg_tt:tt: $it:item) => { #[cfg(any( // Test everything if: // // * tests are enabled, // * no features about exclusively testing // specific vector classes are enabled all(test, not(any( test_v16, test_v32, test_v64, test_v128, test_v256, test_v512, test_none, // disables all tests ))), // Test if: // // * tests are enabled // * a particular cfg token tree returns true all(test, $cfg_tt), ))] $it }; } #[cfg(test)] #[allow(unused)] macro_rules! ref_ { ($anything:tt) => { &$anything }; } #[cfg(test)] #[allow(unused)] macro_rules! ref_mut_ { ($anything:tt) => { &mut $anything }; } packed_simd-0.3.3/src/testing/utils.rs010064400007650000024000000104101342460246600161540ustar0000000000000000//! Testing utilities #![allow(dead_code)] use crate::{cmp::PartialOrd, fmt::Debug, LexicographicallyOrdered}; /// Tests PartialOrd for `a` and `b` where `a < b` is true. pub fn test_lt( a: LexicographicallyOrdered, b: LexicographicallyOrdered, ) where LexicographicallyOrdered: Debug + PartialOrd, { assert!(a < b, "{:?}, {:?}", a, b); assert!(b > a, "{:?}, {:?}", a, b); assert!(!(a == b), "{:?}, {:?}", a, b); assert!(a != b, "{:?}, {:?}", a, b); assert!(a <= b, "{:?}, {:?}", a, b); assert!(b >= a, "{:?}, {:?}", a, b); // Irreflexivity assert!(!(a < a), "{:?}, {:?}", a, b); assert!(!(b < b), "{:?}, {:?}", a, b); assert!(!(a > a), "{:?}, {:?}", a, b); assert!(!(b > b), "{:?}, {:?}", a, b); assert!(a <= a, "{:?}, {:?}", a, b); assert!(b <= b, "{:?}, {:?}", a, b); } /// Tests PartialOrd for `a` and `b` where `a <= b` is true. pub fn test_le( a: LexicographicallyOrdered, b: LexicographicallyOrdered, ) where LexicographicallyOrdered: Debug + PartialOrd, { assert!(a <= b, "{:?}, {:?}", a, b); assert!(b >= a, "{:?}, {:?}", a, b); assert!(a == b || a < b, "{:?}, {:?}", a, b); assert!(a == b || b > a, "{:?}, {:?}", a, b); if a == b { assert!(!(a < b), "{:?}, {:?}", a, b); assert!(!(b > a), "{:?}, {:?}", a, b); assert!(!(a != b), "{:?}, {:?}", a, b); } else { assert!(a != b, "{:?}, {:?}", a, b); test_lt(a, b); } } /// Test PartialOrd::partial_cmp for `a` and `b` returning `Ordering` pub fn test_cmp( a: LexicographicallyOrdered, b: LexicographicallyOrdered, o: Option, ) where LexicographicallyOrdered: PartialOrd + Debug, T: Debug + crate::sealed::Simd + Copy + Clone, ::Element: Default + Copy + Clone + PartialOrd, { assert!(T::LANES <= 64, "array length in these two arrays needs updating"); let mut arr_a: [T::Element; 64] = [Default::default(); 64]; let mut arr_b: [T::Element; 64] = [Default::default(); 64]; unsafe { crate::ptr::write_unaligned( arr_a.as_mut_ptr() as *mut LexicographicallyOrdered, a, ) } unsafe { crate::ptr::write_unaligned( arr_b.as_mut_ptr() as *mut LexicographicallyOrdered, b, ) } let expected = arr_a[0..T::LANES].partial_cmp(&arr_b[0..T::LANES]); let result = a.partial_cmp(&b); assert_eq!(expected, result, "{:?}, {:?}", a, b); assert_eq!(o, result, "{:?}, {:?}", a, b); match o { Some(crate::cmp::Ordering::Less) => { test_lt(a, b); test_le(a, b); } Some(crate::cmp::Ordering::Greater) => { test_lt(b, a); test_le(b, a); } Some(crate::cmp::Ordering::Equal) => { assert!(a == b, "{:?}, {:?}", a, b); assert!(!(a != b), "{:?}, {:?}", a, b); assert!(!(a < b), "{:?}, {:?}", a, b); assert!(!(b < a), "{:?}, {:?}", a, b); assert!(!(a > b), "{:?}, {:?}", a, b); assert!(!(b > a), "{:?}, {:?}", a, b); test_le(a, b); test_le(b, a); } None => { assert!(!(a == b), "{:?}, {:?}", a, b); assert!(!(a != b), "{:?}, {:?}", a, b); assert!(!(a < b), "{:?}, {:?}", a, b); assert!(!(a > b), "{:?}, {:?}", a, b); assert!(!(b < a), "{:?}, {:?}", a, b); assert!(!(b > a), "{:?}, {:?}", a, b); assert!(!(a <= b), "{:?}, {:?}", a, b); assert!(!(b <= a), "{:?}, {:?}", a, b); assert!(!(a >= b), "{:?}, {:?}", a, b); assert!(!(b >= a), "{:?}, {:?}", a, b); } } } // Returns a tuple containing two distinct pointer values of the same type as // the element type of the Simd vector `$id`. #[allow(unused)] macro_rules! ptr_vals { ($id:ty) => { // expands to an expression #[allow(unused_unsafe)] unsafe { // all bits cleared let clear: <$id as sealed::Simd>::Element = crate::mem::zeroed(); // all bits set let set: <$id as sealed::Simd>::Element = crate::mem::transmute(-1_isize); (clear, set) } }; } packed_simd-0.3.3/src/v128.rs010064400007650000024000000060101342467506300140430ustar0000000000000000//! 128-bit wide vector types #![rustfmt::skip] use crate::*; impl_i!([i8; 16]: i8x16, m8x16 | i8, u16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: | /// A 128-bit vector with 16 `i8` lanes. ); impl_u!([u8; 16]: u8x16, m8x16 | u8, u16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: | /// A 128-bit vector with 16 `u8` lanes. ); impl_m!([m8; 16]: m8x16 | i8, u16 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: m16x16 | /// A 128-bit vector mask with 16 `m8` lanes. ); impl_i!([i16; 8]: i16x8, m16x8 | i16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | From: i8x8, u8x8 | /// A 128-bit vector with 8 `i16` lanes. ); impl_u!([u16; 8]: u16x8, m16x8 | u16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | From: u8x8 | /// A 128-bit vector with 8 `u16` lanes. ); impl_m!([m16; 8]: m16x8 | i16, u8 | test_v128 | x0, x1, x2, x3, x4, x5, x6, x7 | From: m8x8, m32x8 | /// A 128-bit vector mask with 8 `m16` lanes. ); impl_i!([i32; 4]: i32x4, m32x4 | i32, u8 | test_v128 | x0, x1, x2, x3 | From: i8x4, u8x4, i16x4, u16x4 | /// A 128-bit vector with 4 `i32` lanes. ); impl_u!([u32; 4]: u32x4, m32x4 | u32, u8 | test_v128 | x0, x1, x2, x3 | From: u8x4, u16x4 | /// A 128-bit vector with 4 `u32` lanes. ); impl_f!([f32; 4]: f32x4, m32x4 | f32 | test_v128 | x0, x1, x2, x3 | From: i8x4, u8x4, i16x4, u16x4 | /// A 128-bit vector with 4 `f32` lanes. ); impl_m!([m32; 4]: m32x4 | i32, u8 | test_v128 | x0, x1, x2, x3 | From: m8x4, m16x4, m64x4 | /// A 128-bit vector mask with 4 `m32` lanes. ); impl_i!([i64; 2]: i64x2, m64x2 | i64, u8 | test_v128 | x0, x1 | From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2 | /// A 128-bit vector with 2 `i64` lanes. ); impl_u!([u64; 2]: u64x2, m64x2 | u64, u8 | test_v128 | x0, x1 | From: u8x2, u16x2, u32x2 | /// A 128-bit vector with 2 `u64` lanes. ); impl_f!([f64; 2]: f64x2, m64x2 | f64 | test_v128 | x0, x1 | From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, f32x2 | /// A 128-bit vector with 2 `f64` lanes. ); impl_m!([m64; 2]: m64x2 | i64, u8 | test_v128 | x0, x1 | From: m8x2, m16x2, m32x2, m128x2 | /// A 128-bit vector mask with 2 `m64` lanes. ); impl_i!([i128; 1]: i128x1, m128x1 | i128, u8 | test_v128 | x0 | From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, i64x1, u64x1 */ | // FIXME: unary small vector types /// A 128-bit vector with 1 `i128` lane. ); impl_u!([u128; 1]: u128x1, m128x1 | u128, u8 | test_v128 | x0 | From: /*u8x1, u16x1, u32x1, u64x1 */ | // FIXME: unary small vector types /// A 128-bit vector with 1 `u128` lane. ); impl_m!([m128; 1]: m128x1 | i128, u8 | test_v128 | x0 | From: /*m8x1, m16x1, m32x1, m64x1 */ | // FIXME: unary small vector types /// A 128-bit vector mask with 1 `m128` lane. ); packed_simd-0.3.3/src/v16.rs010064400007650000024000000006771342467506300137740ustar0000000000000000//! 16-bit wide vector types use crate::*; impl_i!([i8; 2]: i8x2, m8x2 | i8, u8 | test_v16 | x0, x1 | From: | /// A 16-bit vector with 2 `i8` lanes. ); impl_u!([u8; 2]: u8x2, m8x2 | u8, u8 | test_v16 | x0, x1 | From: | /// A 16-bit vector with 2 `u8` lanes. ); impl_m!([m8; 2]: m8x2 | i8, u8 | test_v16 | x0, x1 | From: m16x2, m32x2, m64x2, m128x2 | /// A 16-bit vector mask with 2 `m8` lanes. ); packed_simd-0.3.3/src/v256.rs010064400007650000024000000066071342467506300140610ustar0000000000000000//! 256-bit wide vector types #![rustfmt::skip] use crate::*; impl_i!([i8; 32]: i8x32, m8x32 | i8, u32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | From: | /// A 256-bit vector with 32 `i8` lanes. ); impl_u!([u8; 32]: u8x32, m8x32 | u8, u32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | From: | /// A 256-bit vector with 32 `u8` lanes. ); impl_m!([m8; 32]: m8x32 | i8, u32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | From: | /// A 256-bit vector mask with 32 `m8` lanes. ); impl_i!([i16; 16]: i16x16, m16x16 | i16, u16 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: i8x16, u8x16 | /// A 256-bit vector with 16 `i16` lanes. ); impl_u!([u16; 16]: u16x16, m16x16 | u16, u16 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: u8x16 | /// A 256-bit vector with 16 `u16` lanes. ); impl_m!([m16; 16]: m16x16 | i16, u16 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: m8x16 | /// A 256-bit vector mask with 16 `m16` lanes. ); impl_i!([i32; 8]: i32x8, m32x8 | i32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | From: i8x8, u8x8, i16x8, u16x8 | /// A 256-bit vector with 8 `i32` lanes. ); impl_u!([u32; 8]: u32x8, m32x8 | u32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | From: u8x8, u16x8 | /// A 256-bit vector with 8 `u32` lanes. ); impl_f!([f32; 8]: f32x8, m32x8 | f32 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | From: i8x8, u8x8, i16x8, u16x8 | /// A 256-bit vector with 8 `f32` lanes. ); impl_m!([m32; 8]: m32x8 | i32, u8 | test_v256 | x0, x1, x2, x3, x4, x5, x6, x7 | From: m8x8, m16x8 | /// A 256-bit vector mask with 8 `m32` lanes. ); impl_i!([i64; 4]: i64x4, m64x4 | i64, u8 | test_v256 | x0, x1, x2, x3 | From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4 | /// A 256-bit vector with 4 `i64` lanes. ); impl_u!([u64; 4]: u64x4, m64x4 | u64, u8 | test_v256 | x0, x1, x2, x3 | From: u8x4, u16x4, u32x4 | /// A 256-bit vector with 4 `u64` lanes. ); impl_f!([f64; 4]: f64x4, m64x4 | f64 | test_v256 | x0, x1, x2, x3 | From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, f32x4 | /// A 256-bit vector with 4 `f64` lanes. ); impl_m!([m64; 4]: m64x4 | i64, u8 | test_v256 | x0, x1, x2, x3 | From: m8x4, m16x4, m32x4 | /// A 256-bit vector mask with 4 `m64` lanes. ); impl_i!([i128; 2]: i128x2, m128x2 | i128, u8 | test_v256 | x0, x1 | From: i8x2, u8x2, i16x2, u16x2, i32x2, u32x2, i64x2, u64x2 | /// A 256-bit vector with 2 `i128` lanes. ); impl_u!([u128; 2]: u128x2, m128x2 | u128, u8 | test_v256 | x0, x1 | From: u8x2, u16x2, u32x2, u64x2 | /// A 256-bit vector with 2 `u128` lanes. ); impl_m!([m128; 2]: m128x2 | i128, u8 | test_v256 | x0, x1 | From: m8x2, m16x2, m32x2, m64x2 | /// A 256-bit vector mask with 2 `m128` lanes. ); packed_simd-0.3.3/src/v32.rs010064400007650000024000000015771342467506300137720ustar0000000000000000//! 32-bit wide vector types use crate::*; impl_i!([i8; 4]: i8x4, m8x4 | i8, u8 | test_v32 | x0, x1, x2, x3 | From: | /// A 32-bit vector with 4 `i8` lanes. ); impl_u!([u8; 4]: u8x4, m8x4 | u8, u8 | test_v32 | x0, x1, x2, x3 | From: | /// A 32-bit vector with 4 `u8` lanes. ); impl_m!([m8; 4]: m8x4 | i8, u8 | test_v32 | x0, x1, x2, x3 | From: m16x4, m32x4, m64x4 | /// A 32-bit vector mask with 4 `m8` lanes. ); impl_i!([i16; 2]: i16x2, m16x2 | i16, u8 | test_v32 | x0, x1 | From: i8x2, u8x2 | /// A 32-bit vector with 2 `i16` lanes. ); impl_u!([u16; 2]: u16x2, m16x2 | u16, u8 | test_v32 | x0, x1 | From: u8x2 | /// A 32-bit vector with 2 `u16` lanes. ); impl_m!([m16; 2]: m16x2 | i16, u8 | test_v32 | x0, x1 | From: m8x2, m32x2, m64x2, m128x2 | /// A 32-bit vector mask with 2 `m16` lanes. ); packed_simd-0.3.3/src/v512.rs010064400007650000024000000107141342467506300140460ustar0000000000000000//! 512-bit wide vector types #![rustfmt::skip] use crate::*; impl_i!([i8; 64]: i8x64, m8x64 | i8, u64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | From: | /// A 512-bit vector with 64 `i8` lanes. ); impl_u!([u8; 64]: u8x64, m8x64 | u8, u64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | From: | /// A 512-bit vector with 64 `u8` lanes. ); impl_m!([m8; 64]: m8x64 | i8, u64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | From: | /// A 512-bit vector mask with 64 `m8` lanes. ); impl_i!([i16; 32]: i16x32, m16x32 | i16, u32 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | From: i8x32, u8x32 | /// A 512-bit vector with 32 `i16` lanes. ); impl_u!([u16; 32]: u16x32, m16x32 | u16, u32 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | From: u8x32 | /// A 512-bit vector with 32 `u16` lanes. ); impl_m!([m16; 32]: m16x32 | i16, u32 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | From: m8x32 | /// A 512-bit vector mask with 32 `m16` lanes. ); impl_i!([i32; 16]: i32x16, m32x16 | i32, u16 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: i8x16, u8x16, i16x16, u16x16 | /// A 512-bit vector with 16 `i32` lanes. ); impl_u!([u32; 16]: u32x16, m32x16 | u32, u16 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: u8x16, u16x16 | /// A 512-bit vector with 16 `u32` lanes. ); impl_f!([f32; 16]: f32x16, m32x16 | f32 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: i8x16, u8x16, i16x16, u16x16 | /// A 512-bit vector with 16 `f32` lanes. ); impl_m!([m32; 16]: m32x16 | i32, u16 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | From: m8x16, m16x16 | /// A 512-bit vector mask with 16 `m32` lanes. ); impl_i!([i64; 8]: i64x8, m64x8 | i64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8 | /// A 512-bit vector with 8 `i64` lanes. ); impl_u!([u64; 8]: u64x8, m64x8 | u64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: u8x8, u16x8, u32x8 | /// A 512-bit vector with 8 `u64` lanes. ); impl_f!([f64; 8]: f64x8, m64x8 | f64 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: i8x8, u8x8, i16x8, u16x8, i32x8, u32x8, f32x8 | /// A 512-bit vector with 8 `f64` lanes. ); impl_m!([m64; 8]: m64x8 | i64, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: m8x8, m16x8, m32x8 | /// A 512-bit vector mask with 8 `m64` lanes. ); impl_i!([i128; 4]: i128x4, m128x4 | i128, u8 | test_v512 | x0, x1, x2, x3 | From: i8x4, u8x4, i16x4, u16x4, i32x4, u32x4, i64x4, u64x4 | /// A 512-bit vector with 4 `i128` lanes. ); impl_u!([u128; 4]: u128x4, m128x4 | u128, u8 | test_v512 | x0, x1, x2, x3 | From: u8x4, u16x4, u32x4, u64x4 | /// A 512-bit vector with 4 `u128` lanes. ); impl_m!([m128; 4]: m128x4 | i128, u8 | test_v512 | x0, x1, x2, x3 | From: m8x4, m16x4, m32x4, m64x4 | /// A 512-bit vector mask with 4 `m128` lanes. ); packed_simd-0.3.3/src/v64.rs010064400007650000024000000045141342636765500140000ustar0000000000000000//! 64-bit wide vector types #![rustfmt::skip] use super::*; impl_i!([i8; 8]: i8x8, m8x8 | i8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | /// A 64-bit vector with 8 `i8` lanes. ); impl_u!([u8; 8]: u8x8, m8x8 | u8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | /// A 64-bit vector with 8 `u8` lanes. ); impl_m!([m8; 8]: m8x8 | i8, u8 | test_v64 | x0, x1, x2, x3, x4, x5, x6, x7 | From: m16x8, m32x8 | /// A 64-bit vector mask with 8 `m8` lanes. ); impl_i!([i16; 4]: i16x4, m16x4 | i16, u8 | test_v64 | x0, x1, x2, x3 | From: i8x4, u8x4 | /// A 64-bit vector with 4 `i16` lanes. ); impl_u!([u16; 4]: u16x4, m16x4 | u16, u8 | test_v64 | x0, x1, x2, x3 | From: u8x4 | /// A 64-bit vector with 4 `u16` lanes. ); impl_m!([m16; 4]: m16x4 | i16, u8 | test_v64 | x0, x1, x2, x3 | From: m8x4, m32x4, m64x4 | /// A 64-bit vector mask with 4 `m16` lanes. ); impl_i!([i32; 2]: i32x2, m32x2 | i32, u8 | test_v64 | x0, x1 | From: i8x2, u8x2, i16x2, u16x2 | /// A 64-bit vector with 2 `i32` lanes. ); impl_u!([u32; 2]: u32x2, m32x2 | u32, u8 | test_v64 | x0, x1 | From: u8x2, u16x2 | /// A 64-bit vector with 2 `u32` lanes. ); impl_m!([m32; 2]: m32x2 | i32, u8 | test_v64 | x0, x1 | From: m8x2, m16x2, m64x2, m128x2 | /// A 64-bit vector mask with 2 `m32` lanes. ); impl_f!([f32; 2]: f32x2, m32x2 | f32 | test_v64 | x0, x1 | From: i8x2, u8x2, i16x2, u16x2 | /// A 64-bit vector with 2 `f32` lanes. ); /* impl_i!([i64; 1]: i64x1, m64x1 | i64, u8 | test_v64 | x0 | From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1*/ | // FIXME: primitive to vector conversion /// A 64-bit vector with 1 `i64` lanes. ); impl_u!([u64; 1]: u64x1, m64x1 | u64, u8 | test_v64 | x0 | From: /*u8x1, u16x1, u32x1*/ | // FIXME: primitive to vector conversion /// A 64-bit vector with 1 `u64` lanes. ); impl_m!([m64; 1]: m64x1 | i64, u8 | test_v64 | x0 | From: /*m8x1, m16x1, m32x1, */ m128x1 | // FIXME: unary small vector types /// A 64-bit vector mask with 1 `m64` lanes. ); impl_f!([f64; 1]: f64x1, m64x1 | f64 | test_v64 | x0 | From: /*i8x1, u8x1, i16x1, u16x1, i32x1, u32x1, f32x1*/ | // FIXME: unary small vector types /// A 64-bit vector with 1 `f64` lanes. ); */ packed_simd-0.3.3/src/vPtr.rs010064400007650000024000000017071342460246600143030ustar0000000000000000//! Vectors of pointers #![rustfmt::skip] use crate::*; impl_const_p!( [*const T; 2]: cptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | /// A vector with 2 `*const T` lanes ); impl_mut_p!( [*mut T; 2]: mptrx2, msizex2, usizex2, isizex2 | test_v128 | x0, x1 | From: | /// A vector with 2 `*mut T` lanes ); impl_const_p!( [*const T; 4]: cptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | /// A vector with 4 `*const T` lanes ); impl_mut_p!( [*mut T; 4]: mptrx4, msizex4, usizex4, isizex4 | test_v256 | x0, x1, x2, x3 | From: | /// A vector with 4 `*mut T` lanes ); impl_const_p!( [*const T; 8]: cptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | /// A vector with 8 `*const T` lanes ); impl_mut_p!( [*mut T; 8]: mptrx8, msizex8, usizex8, isizex8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | /// A vector with 8 `*mut T` lanes ); packed_simd-0.3.3/src/vSize.rs010064400007650000024000000027141342467506300144520ustar0000000000000000//! Vectors with pointer-sized elements use crate::codegen::pointer_sized_int::{isize_, usize_}; use crate::*; impl_i!([isize; 2]: isizex2, msizex2 | isize_, u8 | test_v128 | x0, x1| From: | /// A vector with 2 `isize` lanes. ); impl_u!([usize; 2]: usizex2, msizex2 | usize_, u8 | test_v128 | x0, x1| From: | /// A vector with 2 `usize` lanes. ); impl_m!([msize; 2]: msizex2 | isize_, u8 | test_v128 | x0, x1 | From: | /// A vector mask with 2 `msize` lanes. ); impl_i!([isize; 4]: isizex4, msizex4 | isize_, u8 | test_v256 | x0, x1, x2, x3 | From: | /// A vector with 4 `isize` lanes. ); impl_u!([usize; 4]: usizex4, msizex4 | usize_, u8 | test_v256 | x0, x1, x2, x3| From: | /// A vector with 4 `usize` lanes. ); impl_m!([msize; 4]: msizex4 | isize_, u8 | test_v256 | x0, x1, x2, x3 | From: | /// A vector mask with 4 `msize` lanes. ); impl_i!([isize; 8]: isizex8, msizex8 | isize_, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | /// A vector with 4 `isize` lanes. ); impl_u!([usize; 8]: usizex8, msizex8 | usize_, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | /// A vector with 8 `usize` lanes. ); impl_m!([msize; 8]: msizex8 | isize_, u8 | test_v512 | x0, x1, x2, x3, x4, x5, x6, x7 | From: | /// A vector mask with 8 `msize` lanes. ); packed_simd-0.3.3/tests/endianness.rs010064400007650000024000000161631342461154100160460ustar0000000000000000#[cfg(target_arch = "wasm32")] use wasm_bindgen_test::*; use packed_simd::*; use std::{mem, slice}; #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn endian_indexing() { let v = i32x4::new(0, 1, 2, 3); assert_eq!(v.extract(0), 0); assert_eq!(v.extract(1), 1); assert_eq!(v.extract(2), 2); assert_eq!(v.extract(3), 3); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn endian_bitcasts() { #[cfg_attr(rustfmt, rustfmt_skip)] let x = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let t: i16x8 = unsafe { mem::transmute(x) }; let e: i16x8 = if cfg!(target_endian = "little") { i16x8::new(256, 770, 1284, 1798, 2312, 2826, 3340, 3854) } else { i16x8::new(1, 515, 1029, 1543, 2057, 2571, 3085, 3599) }; assert_eq!(t, e); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn endian_casts() { #[cfg_attr(rustfmt, rustfmt_skip)] let x = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let t: i16x16 = x.into(); // simd_cast #[cfg_attr(rustfmt, rustfmt_skip)] let e = i16x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); assert_eq!(t, e); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn endian_load_and_stores() { #[cfg_attr(rustfmt, rustfmt_skip)] let x = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let mut y: [i16; 8] = [0; 8]; x.write_to_slice_unaligned(unsafe { slice::from_raw_parts_mut(&mut y as *mut _ as *mut i8, 16) }); let e: [i16; 8] = if cfg!(target_endian = "little") { [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] } else { [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] }; assert_eq!(y, e); let z = i8x16::from_slice_unaligned(unsafe { slice::from_raw_parts(&y as *const _ as *const i8, 16) }); assert_eq!(z, x); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn endian_array_union() { union A { data: [f32; 4], vec: f32x4, } let x: [f32; 4] = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; assert_eq!(x[0], 0_f32); assert_eq!(x[1], 1_f32); assert_eq!(x[2], 2_f32); assert_eq!(x[3], 3_f32); let y: f32x4 = unsafe { A { data: [3., 2., 1., 0.] }.vec }; assert_eq!(y, f32x4::new(3., 2., 1., 0.)); union B { data: [i8; 16], vec: i8x16, } #[cfg_attr(rustfmt, rustfmt_skip)] let x = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let x: [i8; 16] = unsafe { B { vec: x }.data }; for i in 0..16 { assert_eq!(x[i], i as i8); } #[cfg_attr(rustfmt, rustfmt_skip)] let y = [ 15, 14, 13, 12, 11, 19, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ]; #[cfg_attr(rustfmt, rustfmt_skip)] let e = i8x16::new( 15, 14, 13, 12, 11, 19, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ); let z = unsafe { B { data: y }.vec }; assert_eq!(z, e); union C { data: [i16; 8], vec: i8x16, } #[cfg_attr(rustfmt, rustfmt_skip)] let x = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let x: [i16; 8] = unsafe { C { vec: x }.data }; let e: [i16; 8] = if cfg!(target_endian = "little") { [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] } else { [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] }; assert_eq!(x, e); } #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn endian_tuple_access() { type F32x4T = (f32, f32, f32, f32); union A { data: F32x4T, vec: f32x4, } let x: F32x4T = unsafe { A { vec: f32x4::new(0., 1., 2., 3.) }.data }; assert_eq!(x.0, 0_f32); assert_eq!(x.1, 1_f32); assert_eq!(x.2, 2_f32); assert_eq!(x.3, 3_f32); let y: f32x4 = unsafe { A { data: (3., 2., 1., 0.) }.vec }; assert_eq!(y, f32x4::new(3., 2., 1., 0.)); #[cfg_attr(rustfmt, rustfmt_skip)] type I8x16T = (i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8); union B { data: I8x16T, vec: i8x16, } #[cfg_attr(rustfmt, rustfmt_skip)] let x = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let x: I8x16T = unsafe { B { vec: x }.data }; assert_eq!(x.0, 0); assert_eq!(x.1, 1); assert_eq!(x.2, 2); assert_eq!(x.3, 3); assert_eq!(x.4, 4); assert_eq!(x.5, 5); assert_eq!(x.6, 6); assert_eq!(x.7, 7); assert_eq!(x.8, 8); assert_eq!(x.9, 9); assert_eq!(x.10, 10); assert_eq!(x.11, 11); assert_eq!(x.12, 12); assert_eq!(x.13, 13); assert_eq!(x.14, 14); assert_eq!(x.15, 15); #[cfg_attr(rustfmt, rustfmt_skip)] let y = ( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ); let z: i8x16 = unsafe { B { data: y }.vec }; #[cfg_attr(rustfmt, rustfmt_skip)] let e = i8x16::new( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ); assert_eq!(e, z); #[cfg_attr(rustfmt, rustfmt_skip)] type I16x8T = (i16, i16, i16, i16, i16, i16, i16, i16); union C { data: I16x8T, vec: i8x16, } #[cfg_attr(rustfmt, rustfmt_skip)] let x = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let x: I16x8T = unsafe { C { vec: x }.data }; let e: [i16; 8] = if cfg!(target_endian = "little") { [256, 770, 1284, 1798, 2312, 2826, 3340, 3854] } else { [1, 515, 1029, 1543, 2057, 2571, 3085, 3599] }; assert_eq!(x.0, e[0]); assert_eq!(x.1, e[1]); assert_eq!(x.2, e[2]); assert_eq!(x.3, e[3]); assert_eq!(x.4, e[4]); assert_eq!(x.5, e[5]); assert_eq!(x.6, e[6]); assert_eq!(x.7, e[7]); #[cfg_attr(rustfmt, rustfmt_skip)] #[repr(C)] #[derive(Copy ,Clone)] pub struct Tup(pub i8, pub i8, pub i16, pub i8, pub i8, pub i16, pub i8, pub i8, pub i16, pub i8, pub i8, pub i16); union D { data: Tup, vec: i8x16, } #[cfg_attr(rustfmt, rustfmt_skip)] let x = i8x16::new( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, ); let x: Tup = unsafe { D { vec: x }.data }; let e: [i16; 12] = if cfg!(target_endian = "little") { [0, 1, 770, 4, 5, 1798, 8, 9, 2826, 12, 13, 3854] } else { [0, 1, 515, 4, 5, 1543, 8, 9, 2571, 12, 13, 3599] }; assert_eq!(x.0 as i16, e[0]); assert_eq!(x.1 as i16, e[1]); assert_eq!(x.2 as i16, e[2]); assert_eq!(x.3 as i16, e[3]); assert_eq!(x.4 as i16, e[4]); assert_eq!(x.5 as i16, e[5]); assert_eq!(x.6 as i16, e[6]); assert_eq!(x.7 as i16, e[7]); assert_eq!(x.8 as i16, e[8]); assert_eq!(x.9 as i16, e[9]); assert_eq!(x.10 as i16, e[10]); assert_eq!(x.11 as i16, e[11]); } packed_simd-0.3.3/.cargo_vcs_info.json0000644000000001120000000000000133330ustar00{ "git": { "sha1": "1bf435a0ba93c87b52ee9459a54d782a734365fd" } }