compiler_builtins-0.1.101/.cargo_vcs_info.json0000644000000001360000000000100147170ustar { "git": { "sha1": "8434a9f27f7f6db476c760372789a09402591b70" }, "path_in_vcs": "" }compiler_builtins-0.1.101/Cargo.lock0000644000000011440000000000100126720ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "cc" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7db2f146208d7e0fbee761b09cd65a7f51ccc38705d4e7262dad4d73b12a76b1" [[package]] name = "compiler_builtins" version = "0.1.101" dependencies = [ "cc", "rustc-std-workspace-core", ] [[package]] name = "rustc-std-workspace-core" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1956f5517128a2b6f23ab2dadf1a976f4f5b27962e7724c2bf3d45e539ec098c" compiler_builtins-0.1.101/Cargo.toml0000644000000032100000000000100127110ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "compiler_builtins" version = "0.1.101" authors = ["Jorge Aparicio "] links = "compiler-rt" include = [ "/Cargo.toml", "/build.rs", "/src/*", "/examples/*", "/LICENSE.txt", "/README.md", "/compiler-rt/*", "/libm/src/math/*", ] description = """ Compiler intrinsics used by the Rust compiler. Also available for other targets if necessary! """ homepage = "https://github.com/rust-lang/compiler-builtins" documentation = "https://docs.rs/compiler_builtins" readme = "README.md" license = "MIT/Apache-2.0" repository = "https://github.com/rust-lang/compiler-builtins" [profile.dev] panic = "abort" [profile.release] panic = "abort" [lib] test = false [[example]] name = "intrinsics" required-features = ["compiler-builtins"] [dependencies.core] version = "1.0.0" optional = true package = "rustc-std-workspace-core" [dev-dependencies] [build-dependencies.cc] version = "1.0" optional = true [features] c = ["cc"] compiler-builtins = [] default = ["compiler-builtins"] mangled-names = [] mem = [] no-asm = [] public-test-deps = [] rustc-dep-of-std = [ "compiler-builtins", "core", ] weak-intrinsics = [] compiler_builtins-0.1.101/Cargo.toml.orig000064400000000000000000000050211046102023000163740ustar 00000000000000[package] authors = ["Jorge Aparicio "] name = "compiler_builtins" version = "0.1.101" license = "MIT/Apache-2.0" readme = "README.md" repository = "https://github.com/rust-lang/compiler-builtins" homepage = "https://github.com/rust-lang/compiler-builtins" documentation = "https://docs.rs/compiler_builtins" edition = "2018" description = """ Compiler intrinsics used by the Rust compiler. Also available for other targets if necessary! """ include = [ '/Cargo.toml', '/build.rs', '/src/*', '/examples/*', '/LICENSE.txt', '/README.md', '/compiler-rt/*', '/libm/src/math/*', ] links = 'compiler-rt' [lib] test = false [dependencies] # For more information on this dependency see rust-lang/rust's # `src/tools/rustc-std-workspace` folder core = { version = "1.0.0", optional = true, package = 'rustc-std-workspace-core' } [build-dependencies] cc = { optional = true, version = "1.0" } [dev-dependencies] panic-handler = { path = 'crates/panic-handler' } [features] default = ["compiler-builtins"] # Enable compilation of C code in compiler-rt, filling in some more optimized # implementations and also filling in unimplemented intrinsics c = ["cc"] # Workaround for the Cranelift codegen backend. Disables any implementations # which use inline assembly and fall back to pure Rust versions (if avalible). no-asm = [] # Flag this library as the unstable compiler-builtins lib compiler-builtins = [] # Generate memory-related intrinsics like memcpy mem = [] # Mangle all names so this can be linked in with other versions or other # compiler-rt implementations. Also used for testing mangled-names = [] # Only used in the compiler's build system rustc-dep-of-std = ['compiler-builtins', 'core'] # This makes certain traits and function specializations public that # are not normally public but are required by the `testcrate` public-test-deps = [] # Marks all intrinsics functions with weak linkage so that they can be # replaced at link time by another implementation. This is particularly useful # for mixed Rust/C++ binaries that want to use the C++ intrinsics, otherwise # linking against the Rust stdlib will replace those from the compiler-rt # library. # # Unlike the "c" feature, the intrinsics are still provided by the Rust # implementations and each will be used unless a stronger symbol replaces # it during linking. weak-intrinsics = [] [[example]] name = "intrinsics" required-features = ["compiler-builtins"] [workspace] members = ["testcrate"] [profile.release] panic = 'abort' [profile.dev] panic = 'abort' compiler_builtins-0.1.101/LICENSE.txt000064400000000000000000000103131046102023000153300ustar 00000000000000============================================================================== compiler-builtins License ============================================================================== The compiler-builtins crate is dual licensed under both the University of Illinois "BSD-Like" license and the MIT license. As a user of this code you may choose to use it under either license. As a contributor, you agree to allow your code to be used under both. Full text of the relevant licenses is included below. ============================================================================== University of Illinois/NCSA Open Source License Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT All rights reserved. Developed by: LLVM Team University of Illinois at Urbana-Champaign http://llvm.org Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal with the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimers. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimers in the documentation and/or other materials provided with the distribution. * Neither the names of the LLVM Team, University of Illinois at Urbana-Champaign, nor the names of its contributors may be used to endorse or promote products derived from this Software without specific prior written permission. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE. ============================================================================== Copyright (c) 2009-2015 by the contributors listed in CREDITS.TXT Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ============================================================================== Copyrights and Licenses for Third Party Software Distributed with LLVM: ============================================================================== The LLVM software contains code written by third parties. Such software will have its own individual LICENSE.TXT file in the directory in which it appears. This file will describe the copyrights, license, and restrictions which apply to that code. The disclaimer of warranty in the University of Illinois Open Source License applies to all code in the LLVM Distribution, and nothing in any of the other licenses gives permission to use the names of the LLVM Team or the University of Illinois to endorse or promote products derived from this Software. compiler_builtins-0.1.101/README.md000064400000000000000000000252631046102023000147760ustar 00000000000000# `compiler-builtins` > Porting `compiler-rt` intrinsics to Rust See [rust-lang/rust#35437][0]. [0]: https://github.com/rust-lang/rust/issues/35437 ## When and how to use this crate? If you are working with a target that doesn't have binary releases of std available via rustup (this probably means you are building the core crate yourself) and need compiler-rt intrinsics (i.e. you are probably getting linker errors when building an executable: `undefined reference to __aeabi_memcpy`), you can use this crate to get those intrinsics and solve the linker errors. To do that, add this crate somewhere in the dependency graph of the crate you are building: ``` toml # Cargo.toml [dependencies] compiler_builtins = { git = "https://github.com/rust-lang/compiler-builtins" } ``` ``` rust extern crate compiler_builtins; // ... ``` If you still get an "undefined reference to $INTRINSIC" error after that change, that means that we haven't ported `$INTRINSIC` to Rust yet! Please open [an issue] with the name of the intrinsic and the LLVM triple (e.g. thumbv7m-none-eabi) of the target you are using. That way we can prioritize porting that particular intrinsic. If you've got a C compiler available for your target then while we implement this intrinsic you can temporarily enable a fallback to the actual compiler-rt implementation as well for unimplemented intrinsics: ```toml [dependencies.compiler_builtins] git = "https://github.com/rust-lang/compiler-builtins" features = ["c"] ``` [an issue]: https://github.com/rust-lang/compiler-builtins/issues ## Contributing 1. Pick one or more intrinsics from the [pending list](#progress). 2. Fork this repository. 3. Port the intrinsic(s) and their corresponding [unit tests][1] from their [C implementation][2] to Rust. 4. Implement a [test generator][3] to compare the behavior of the ported intrinsic(s) with their implementation on the testing host. Note that randomized compiler-builtin tests should be run using `cargo test --features gen-tests`. 4. Send a Pull Request (PR). 5. Once the PR passes our extensive [testing infrastructure][4], we'll merge it! 6. Celebrate :tada: [1]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/test/builtins/Unit [2]: https://github.com/rust-lang/llvm-project/tree/9e3de9490ff580cd484fbfa2908292b4838d56e7/compiler-rt/lib/builtins [3]: https://github.com/rust-lang/compiler-builtins/blob/0ba07e49264a54cb5bbd4856fcea083bb3fbec15/build.rs#L180-L265 [4]: https://travis-ci.org/rust-lang/compiler-builtins ### Porting Reminders 1. [Rust][5a] and [C][5b] have slightly different operator precedence. C evaluates comparisons (`== !=`) before bitwise operations (`& | ^`), while Rust evaluates the other way. 2. C assumes wrapping operations everywhere. Rust panics on overflow when in debug mode. Consider using the [Wrapping][6] type or the explicit [wrapping_*][7] functions where applicable. 3. Note [C implicit casts][8], especially integer promotion. Rust is much more explicit about casting, so be sure that any cast which affects the output is ported to the Rust implementation. 4. Rust has [many functions][9] for integer or floating point manipulation in the standard library. Consider using one of these functions rather than porting a new one. [5a]: https://doc.rust-lang.org/reference/expressions.html#expression-precedence [5b]: http://en.cppreference.com/w/c/language/operator_precedence [6]: https://doc.rust-lang.org/core/num/struct.Wrapping.html [7]: https://doc.rust-lang.org/std/primitive.i32.html#method.wrapping_add [8]: http://en.cppreference.com/w/cpp/language/implicit_conversion [9]: https://doc.rust-lang.org/std/primitive.i32.html ## Progress - [x] adddf3.c - [x] addsf3.c - [x] arm/adddf3vfp.S - [x] arm/addsf3vfp.S - [x] arm/aeabi_dcmp.S - [x] arm/aeabi_fcmp.S - [x] arm/aeabi_idivmod.S - [x] arm/aeabi_ldivmod.S - [x] arm/aeabi_memcpy.S - [x] arm/aeabi_memmove.S - [x] arm/aeabi_memset.S - [x] arm/aeabi_uidivmod.S - [x] arm/aeabi_uldivmod.S - [x] arm/divdf3vfp.S - [ ] arm/divmodsi4.S (generic version is done) - [x] arm/divsf3vfp.S - [ ] arm/divsi3.S (generic version is done) - [x] arm/eqdf2vfp.S - [x] arm/eqsf2vfp.S - [x] arm/extendsfdf2vfp.S - [ ] arm/fixdfsivfp.S - [ ] arm/fixsfsivfp.S - [ ] arm/fixunsdfsivfp.S - [ ] arm/fixunssfsivfp.S - [ ] arm/floatsidfvfp.S - [ ] arm/floatsisfvfp.S - [ ] arm/floatunssidfvfp.S - [ ] arm/floatunssisfvfp.S - [x] arm/gedf2vfp.S - [x] arm/gesf2vfp.S - [x] arm/gtdf2vfp.S - [x] arm/gtsf2vfp.S - [x] arm/ledf2vfp.S - [x] arm/lesf2vfp.S - [x] arm/ltdf2vfp.S - [x] arm/ltsf2vfp.S - [ ] arm/modsi3.S (generic version is done) - [x] arm/muldf3vfp.S - [x] arm/mulsf3vfp.S - [x] arm/nedf2vfp.S - [ ] arm/negdf2vfp.S - [ ] arm/negsf2vfp.S - [x] arm/nesf2vfp.S - [x] arm/softfloat-alias.list - [x] arm/subdf3vfp.S - [x] arm/subsf3vfp.S - [x] arm/truncdfsf2vfp.S - [ ] arm/udivmodsi4.S (generic version is done) - [ ] arm/udivsi3.S (generic version is done) - [ ] arm/umodsi3.S (generic version is done) - [ ] arm/unorddf2vfp.S - [ ] arm/unordsf2vfp.S - [x] ashldi3.c - [x] ashrdi3.c - [x] comparedf2.c - [x] comparesf2.c - [x] divdf3.c - [x] divdi3.c - [x] divmoddi4.c - [x] divmodsi4.c - [x] divsf3.c - [x] divsi3.c - [ ] extendhfsf2.c - [x] extendsfdf2.c - [x] fixdfdi.c - [x] fixdfsi.c - [x] fixsfdi.c - [x] fixsfsi.c - [x] fixunsdfdi.c - [x] fixunsdfsi.c - [x] fixunssfdi.c - [x] fixunssfsi.c - [x] floatdidf.c - [x] floatdisf.c - [x] floatsidf.c - [x] floatsisf.c - [x] floatundidf.c - [x] floatundisf.c - [x] floatunsidf.c - [x] floatunsisf.c - [ ] i386/ashldi3.S - [ ] i386/ashrdi3.S - [x] i386/chkstk.S - [x] i386/chkstk2.S - [ ] i386/divdi3.S - [ ] i386/lshrdi3.S - [ ] i386/moddi3.S - [ ] i386/muldi3.S - [ ] i386/udivdi3.S - [ ] i386/umoddi3.S - [x] lshrdi3.c - [x] moddi3.c - [x] modsi3.c - [x] muldf3.c - [x] muldi3.c - [x] mulodi4.c - [x] mulosi4.c - [x] mulsf3.c - [x] powidf2.c - [x] powisf2.c - [x] subdf3.c - [x] subsf3.c - [ ] truncdfhf2.c - [x] truncdfsf2.c - [ ] truncsfhf2.c - [x] udivdi3.c - [x] udivmoddi4.c - [x] udivmodsi4.c - [x] udivsi3.c - [x] umoddi3.c - [x] umodsi3.c - [x] x86_64/chkstk.S - [x] x86_64/chkstk2.S These builtins are needed to support 128-bit integers, which are in the process of being added to Rust. - [x] ashlti3.c - [x] ashrti3.c - [x] divti3.c - [x] fixdfti.c - [x] fixsfti.c - [x] fixunsdfti.c - [x] fixunssfti.c - [x] floattidf.c - [x] floattisf.c - [x] floatuntidf.c - [x] floatuntisf.c - [x] lshrti3.c - [x] modti3.c - [x] muloti4.c - [x] multi3.c - [x] udivmodti4.c - [x] udivti3.c - [x] umodti3.c ## Unimplemented functions These builtins involve floating-point types ("`f128`", "`f80`" and complex numbers) that are not supported by Rust. - ~~addtf3.c~~ - ~~comparetf2.c~~ - ~~divdc3.c~~ - ~~divsc3.c~~ - ~~divtc3.c~~ - ~~divtf3.c~~ - ~~divxc3.c~~ - ~~extenddftf2.c~~ - ~~extendsftf2.c~~ - ~~fixtfdi.c~~ - ~~fixtfsi.c~~ - ~~fixtfti.c~~ - ~~fixunstfdi.c~~ - ~~fixunstfsi.c~~ - ~~fixunstfti.c~~ - ~~fixunsxfdi.c~~ - ~~fixunsxfsi.c~~ - ~~fixunsxfti.c~~ - ~~fixxfdi.c~~ - ~~fixxfti.c~~ - ~~floatditf.c~~ - ~~floatdixf.c~~ - ~~floatsitf.c~~ - ~~floattixf.c~~ - ~~floatunditf.c~~ - ~~floatundixf.c~~ - ~~floatunsitf.c~~ - ~~floatuntixf.c~~ - ~~i386/floatdixf.S~~ - ~~i386/floatundixf.S~~ - ~~muldc3.c~~ - ~~mulsc3.c~~ - ~~multc3.c~~ - ~~multf3.c~~ - ~~mulxc3.c~~ - ~~powitf2.c~~ - ~~powixf2.c~~ - ~~ppc/divtc3.c~~ - ~~ppc/fixtfdi.c~~ - ~~ppc/fixunstfdi.c~~ - ~~ppc/floatditf.c~~ - ~~ppc/floatunditf.c~~ - ~~ppc/gcc_qadd.c~~ - ~~ppc/gcc_qdiv.c~~ - ~~ppc/gcc_qmul.c~~ - ~~ppc/gcc_qsub.c~~ - ~~ppc/multc3.c~~ - ~~subtf3.c~~ - ~~trunctfdf2.c~~ - ~~trunctfsf2.c~~ - ~~x86_64/floatdixf.c~~ - ~~x86_64/floatundixf.S~~ These builtins are never called by LLVM. - ~~absvdi2.c~~ - ~~absvsi2.c~~ - ~~absvti2.c~~ - ~~addvdi3.c~~ - ~~addvsi3.c~~ - ~~addvti3.c~~ - ~~arm/aeabi_cdcmp.S~~ - ~~arm/aeabi_cdcmpeq_check_nan.c~~ - ~~arm/aeabi_cfcmp.S~~ - ~~arm/aeabi_cfcmpeq_check_nan.c~~ - ~~arm/aeabi_div0.c~~ - ~~arm/aeabi_drsub.c~~ - ~~arm/aeabi_frsub.c~~ - ~~arm/aeabi_memcmp.S~~ - ~~arm/bswapdi2.S~~ - ~~arm/bswapsi2.S~~ - ~~arm/clzdi2.S~~ - ~~arm/clzsi2.S~~ - ~~arm/comparesf2.S~~ - ~~arm/restore_vfp_d8_d15_regs.S~~ - ~~arm/save_vfp_d8_d15_regs.S~~ - ~~arm/switch16.S~~ - ~~arm/switch32.S~~ - ~~arm/switch8.S~~ - ~~arm/switchu8.S~~ - ~~clzdi2.c~~ - ~~clzsi2.c~~ - ~~clzti2.c~~ - ~~cmpdi2.c~~ - ~~cmpti2.c~~ - ~~ctzdi2.c~~ - ~~ctzsi2.c~~ - ~~ctzti2.c~~ - ~~ffsdi2.c~~ - this is [called by gcc][jemalloc-fail] though! - ~~ffsti2.c~~ - ~~mulvdi3.c~~ - ~~mulvsi3.c~~ - ~~mulvti3.c~~ - ~~negdf2.c~~ - ~~negdi2.c~~ - ~~negsf2.c~~ - ~~negti2.c~~ - ~~negvdi2.c~~ - ~~negvsi2.c~~ - ~~negvti2.c~~ - ~~paritydi2.c~~ - ~~paritysi2.c~~ - ~~parityti2.c~~ - ~~popcountdi2.c~~ - ~~popcountsi2.c~~ - ~~popcountti2.c~~ - ~~ppc/restFP.S~~ - ~~ppc/saveFP.S~~ - ~~subvdi3.c~~ - ~~subvsi3.c~~ - ~~subvti3.c~~ - ~~ucmpdi2.c~~ - ~~ucmpti2.c~~ - ~~udivmodti4.c~~ [jemalloc-fail]: https://travis-ci.org/rust-lang/rust/jobs/249772758 Rust only exposes atomic types on platforms that support them, and therefore does not need to fall back to software implementations. - ~~arm/sync_fetch_and_add_4.S~~ - ~~arm/sync_fetch_and_add_8.S~~ - ~~arm/sync_fetch_and_and_4.S~~ - ~~arm/sync_fetch_and_and_8.S~~ - ~~arm/sync_fetch_and_max_4.S~~ - ~~arm/sync_fetch_and_max_8.S~~ - ~~arm/sync_fetch_and_min_4.S~~ - ~~arm/sync_fetch_and_min_8.S~~ - ~~arm/sync_fetch_and_nand_4.S~~ - ~~arm/sync_fetch_and_nand_8.S~~ - ~~arm/sync_fetch_and_or_4.S~~ - ~~arm/sync_fetch_and_or_8.S~~ - ~~arm/sync_fetch_and_sub_4.S~~ - ~~arm/sync_fetch_and_sub_8.S~~ - ~~arm/sync_fetch_and_umax_4.S~~ - ~~arm/sync_fetch_and_umax_8.S~~ - ~~arm/sync_fetch_and_umin_4.S~~ - ~~arm/sync_fetch_and_umin_8.S~~ - ~~arm/sync_fetch_and_xor_4.S~~ - ~~arm/sync_fetch_and_xor_8.S~~ - ~~arm/sync_synchronize.S~~ - ~~atomic.c~~ - ~~atomic_flag_clear.c~~ - ~~atomic_flag_clear_explicit.c~~ - ~~atomic_flag_test_and_set.c~~ - ~~atomic_flag_test_and_set_explicit.c~~ - ~~atomic_signal_fence.c~~ - ~~atomic_thread_fence.c~~ Miscellaneous functionality that is not used by Rust. - ~~apple_versioning.c~~ - ~~clear_cache.c~~ - ~~emutls.c~~ - ~~enable_execute_stack.c~~ - ~~eprintf.c~~ - ~~gcc_personality_v0.c~~ - ~~trampoline_setup.c~~ Floating-point implementations of builtins that are only called from soft-float code. It would be better to simply use the generic soft-float versions in this case. - ~~i386/floatdidf.S~~ - ~~i386/floatdisf.S~~ - ~~i386/floatundidf.S~~ - ~~i386/floatundisf.S~~ - ~~x86_64/floatundidf.S~~ - ~~x86_64/floatundisf.S~~ - ~~x86_64/floatdidf.c~~ - ~~x86_64/floatdisf.c~~ ## License The compiler-builtins crate is dual licensed under both the University of Illinois "BSD-Like" license and the MIT license. As a user of this code you may choose to use it under either license. As a contributor, you agree to allow your code to be used under both. Full text of the relevant licenses is in LICENSE.TXT. compiler_builtins-0.1.101/build.rs000064400000000000000000000670301046102023000151620ustar 00000000000000use std::{collections::HashMap, env, sync::atomic::Ordering}; fn main() { println!("cargo:rerun-if-changed=build.rs"); let target = env::var("TARGET").unwrap(); let cwd = env::current_dir().unwrap(); println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display()); // Activate libm's unstable features to make full use of Nightly. println!("cargo:rustc-cfg=feature=\"unstable\""); // Emscripten's runtime includes all the builtins if target.contains("emscripten") { return; } // OpenBSD provides compiler_rt by default, use it instead of rebuilding it from source if target.contains("openbsd") { println!("cargo:rustc-link-search=native=/usr/lib"); println!("cargo:rustc-link-lib=compiler_rt"); return; } // Forcibly enable memory intrinsics on wasm & SGX as we don't have a libc to // provide them. if (target.contains("wasm") && !target.contains("wasi")) || (target.contains("sgx") && target.contains("fortanix")) || target.contains("-none") || target.contains("nvptx") || target.contains("uefi") || target.contains("xous") { println!("cargo:rustc-cfg=feature=\"mem\""); } // These targets have hardware unaligned access support. if target.contains("x86_64") || target.contains("i686") || target.contains("aarch64") || target.contains("bpf") { println!("cargo:rustc-cfg=feature=\"mem-unaligned\""); } // NOTE we are going to assume that llvm-target, what determines our codegen option, matches the // target triple. This is usually correct for our built-in targets but can break in presence of // custom targets, which can have arbitrary names. let llvm_target = target.split('-').collect::>(); // Build missing intrinsics from compiler-rt C source code. If we're // mangling names though we assume that we're also in test mode so we don't // build anything and we rely on the upstream implementation of compiler-rt // functions if !cfg!(feature = "mangled-names") && cfg!(feature = "c") { // Don't use a C compiler for these targets: // // * wasm - clang for wasm is somewhat hard to come by and it's // unlikely that the C is really that much better than our own Rust. // * nvptx - everything is bitcode, not compatible with mixed C/Rust // * riscv - the rust-lang/rust distribution container doesn't have a C // compiler. if !target.contains("wasm") && !target.contains("nvptx") && (!target.starts_with("riscv") || target.contains("xous")) { #[cfg(feature = "c")] c::compile(&llvm_target, &target); } } // To compile intrinsics.rs for thumb targets, where there is no libc if llvm_target[0].starts_with("thumb") { println!("cargo:rustc-cfg=thumb") } // compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because // these targets do not have full Thumb-2 support but only original Thumb-1. // We have to cfg our code accordingly. if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" { println!("cargo:rustc-cfg=thumb_1") } // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This // includes the old androideabi. It is deprecated but it is available as a // rustc target (arm-linux-androideabi). if llvm_target[0] == "armv4t" || llvm_target[0] == "armv5te" || target == "arm-linux-androideabi" { println!("cargo:rustc-cfg=kernel_user_helpers") } if llvm_target[0].starts_with("aarch64") { generate_aarch64_outlined_atomics(); } } fn aarch64_symbol(ordering: Ordering) -> &'static str { match ordering { Ordering::Relaxed => "relax", Ordering::Acquire => "acq", Ordering::Release => "rel", Ordering::AcqRel => "acq_rel", _ => panic!("unknown symbol for {:?}", ordering), } } /// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items. /// Define them from the build script instead. /// Note that the majority of the code is still defined in `aarch64.rs` through inline macros. fn generate_aarch64_outlined_atomics() { use std::fmt::Write; // #[macro_export] so that we can use this in tests let gen_macro = |name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n"); // Generate different macros for add/clr/eor/set so that we can test them separately. let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"]; let mut macros = HashMap::new(); for sym in sym_names { macros.insert(sym, gen_macro(sym)); } // Only CAS supports 16 bytes, and it has a different implementation that uses a different macro. let mut cas16 = gen_macro("cas16"); for ordering in [ Ordering::Relaxed, Ordering::Acquire, Ordering::Release, Ordering::AcqRel, ] { let sym_ordering = aarch64_symbol(ordering); for size in [1, 2, 4, 8] { for (sym, macro_) in &mut macros { let name = format!("__aarch64_{sym}{size}_{sym_ordering}"); writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap(); } } let name = format!("__aarch64_cas16_{sym_ordering}"); writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap(); } let mut buf = String::new(); for macro_def in macros.values().chain(std::iter::once(&cas16)) { buf += macro_def; buf += "}; }"; } let dst = std::env::var("OUT_DIR").unwrap() + "/outlined_atomics.rs"; std::fs::write(dst, buf).unwrap(); } #[cfg(feature = "c")] mod c { extern crate cc; use std::collections::{BTreeMap, HashSet}; use std::env; use std::fs::{self, File}; use std::io::Write; use std::path::{Path, PathBuf}; struct Sources { // SYMBOL -> PATH TO SOURCE map: BTreeMap<&'static str, &'static str>, } impl Sources { fn new() -> Sources { Sources { map: BTreeMap::new(), } } fn extend(&mut self, sources: &[(&'static str, &'static str)]) { // NOTE Some intrinsics have both a generic implementation (e.g. // `floatdidf.c`) and an arch optimized implementation // (`x86_64/floatdidf.c`). In those cases, we keep the arch optimized // implementation and discard the generic implementation. If we don't // and keep both implementations, the linker will yell at us about // duplicate symbols! for (symbol, src) in sources { if src.contains("/") { // Arch-optimized implementation (preferred) self.map.insert(symbol, src); } else { // Generic implementation if !self.map.contains_key(symbol) { self.map.insert(symbol, src); } } } } fn remove(&mut self, symbols: &[&str]) { for symbol in symbols { self.map.remove(*symbol).unwrap(); } } } /// Compile intrinsics from the compiler-rt C source code pub fn compile(llvm_target: &[&str], target: &String) { let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); let target_env = env::var("CARGO_CFG_TARGET_ENV").unwrap(); let target_os = env::var("CARGO_CFG_TARGET_OS").unwrap(); let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap(); let mut consider_float_intrinsics = true; let cfg = &mut cc::Build::new(); // AArch64 GCCs exit with an error condition when they encounter any kind of floating point // code if the `nofp` and/or `nosimd` compiler flags have been set. // // Therefore, evaluate if those flags are present and set a boolean that causes any // compiler-rt intrinsics that contain floating point source to be excluded for this target. if target_arch == "aarch64" { let cflags_key = String::from("CFLAGS_") + &(target.to_owned().replace("-", "_")); if let Ok(cflags_value) = env::var(cflags_key) { if cflags_value.contains("+nofp") || cflags_value.contains("+nosimd") { consider_float_intrinsics = false; } } } cfg.warnings(false); if target_env == "msvc" { // Don't pull in extra libraries on MSVC cfg.flag("/Zl"); // Emulate C99 and C++11's __func__ for MSVC prior to 2013 CTP cfg.define("__func__", Some("__FUNCTION__")); } else { // Turn off various features of gcc and such, mostly copying // compiler-rt's build system already cfg.flag("-fno-builtin"); cfg.flag("-fvisibility=hidden"); cfg.flag("-ffreestanding"); // Avoid the following warning appearing once **per file**: // clang: warning: optimization flag '-fomit-frame-pointer' is not supported for target 'armv7' [-Wignored-optimization-argument] // // Note that compiler-rt's build system also checks // // `check_cxx_compiler_flag(-fomit-frame-pointer COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG)` // // in https://github.com/rust-lang/compiler-rt/blob/c8fbcb3/cmake/config-ix.cmake#L19. cfg.flag_if_supported("-fomit-frame-pointer"); cfg.define("VISIBILITY_HIDDEN", None); } // int_util.c tries to include stdlib.h if `_WIN32` is defined, // which it is when compiling UEFI targets with clang. This is // at odds with compiling with `-ffreestanding`, as the header // may be incompatible or not present. Create a minimal stub // header to use instead. if target_os == "uefi" { let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); let include_dir = out_dir.join("include"); if !include_dir.exists() { fs::create_dir(&include_dir).unwrap(); } fs::write(include_dir.join("stdlib.h"), "#include ").unwrap(); cfg.flag(&format!("-I{}", include_dir.to_str().unwrap())); } let mut sources = Sources::new(); sources.extend(&[ ("__absvdi2", "absvdi2.c"), ("__absvsi2", "absvsi2.c"), ("__addvdi3", "addvdi3.c"), ("__addvsi3", "addvsi3.c"), ("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c"), ("__cmpdi2", "cmpdi2.c"), ("__ctzdi2", "ctzdi2.c"), ("__ctzsi2", "ctzsi2.c"), ("__int_util", "int_util.c"), ("__mulvdi3", "mulvdi3.c"), ("__mulvsi3", "mulvsi3.c"), ("__negdi2", "negdi2.c"), ("__negvdi2", "negvdi2.c"), ("__negvsi2", "negvsi2.c"), ("__paritydi2", "paritydi2.c"), ("__paritysi2", "paritysi2.c"), ("__popcountdi2", "popcountdi2.c"), ("__popcountsi2", "popcountsi2.c"), ("__subvdi3", "subvdi3.c"), ("__subvsi3", "subvsi3.c"), ("__ucmpdi2", "ucmpdi2.c"), ]); if consider_float_intrinsics { sources.extend(&[ ("__divdc3", "divdc3.c"), ("__divsc3", "divsc3.c"), ("__divxc3", "divxc3.c"), ("__extendhfsf2", "extendhfsf2.c"), ("__muldc3", "muldc3.c"), ("__mulsc3", "mulsc3.c"), ("__mulxc3", "mulxc3.c"), ("__negdf2", "negdf2.c"), ("__negsf2", "negsf2.c"), ("__powixf2", "powixf2.c"), ("__truncdfhf2", "truncdfhf2.c"), ("__truncsfhf2", "truncsfhf2.c"), ]); } // When compiling in rustbuild (the rust-lang/rust repo) this library // also needs to satisfy intrinsics that jemalloc or C in general may // need, so include a few more that aren't typically needed by // LLVM/Rust. if cfg!(feature = "rustbuild") { sources.extend(&[("__ffsdi2", "ffsdi2.c")]); } // On iOS and 32-bit OSX these are all just empty intrinsics, no need to // include them. if target_os != "ios" && target_os != "watchos" && (target_vendor != "apple" || target_arch != "x86") { sources.extend(&[ ("__absvti2", "absvti2.c"), ("__addvti3", "addvti3.c"), ("__clzti2", "clzti2.c"), ("__cmpti2", "cmpti2.c"), ("__ctzti2", "ctzti2.c"), ("__ffsti2", "ffsti2.c"), ("__mulvti3", "mulvti3.c"), ("__negti2", "negti2.c"), ("__parityti2", "parityti2.c"), ("__popcountti2", "popcountti2.c"), ("__subvti3", "subvti3.c"), ("__ucmpti2", "ucmpti2.c"), ]); if consider_float_intrinsics { sources.extend(&[("__negvti2", "negvti2.c")]); } } if target_vendor == "apple" { sources.extend(&[ ("atomic_flag_clear", "atomic_flag_clear.c"), ("atomic_flag_clear_explicit", "atomic_flag_clear_explicit.c"), ("atomic_flag_test_and_set", "atomic_flag_test_and_set.c"), ( "atomic_flag_test_and_set_explicit", "atomic_flag_test_and_set_explicit.c", ), ("atomic_signal_fence", "atomic_signal_fence.c"), ("atomic_thread_fence", "atomic_thread_fence.c"), ]); } if target_env == "msvc" { if target_arch == "x86_64" { sources.extend(&[("__floatdixf", "x86_64/floatdixf.c")]); } } else { // None of these seem to be used on x86_64 windows, and they've all // got the wrong ABI anyway, so we want to avoid them. if target_os != "windows" { if target_arch == "x86_64" { sources.extend(&[ ("__floatdixf", "x86_64/floatdixf.c"), ("__floatundixf", "x86_64/floatundixf.S"), ]); } } if target_arch == "x86" { sources.extend(&[ ("__ashldi3", "i386/ashldi3.S"), ("__ashrdi3", "i386/ashrdi3.S"), ("__divdi3", "i386/divdi3.S"), ("__floatdixf", "i386/floatdixf.S"), ("__floatundixf", "i386/floatundixf.S"), ("__lshrdi3", "i386/lshrdi3.S"), ("__moddi3", "i386/moddi3.S"), ("__muldi3", "i386/muldi3.S"), ("__udivdi3", "i386/udivdi3.S"), ("__umoddi3", "i386/umoddi3.S"), ]); } } if target_arch == "arm" && target_os != "ios" && target_os != "watchos" && target_env != "msvc" { sources.extend(&[ ("__aeabi_div0", "arm/aeabi_div0.c"), ("__aeabi_drsub", "arm/aeabi_drsub.c"), ("__aeabi_frsub", "arm/aeabi_frsub.c"), ("__bswapdi2", "arm/bswapdi2.S"), ("__bswapsi2", "arm/bswapsi2.S"), ("__clzdi2", "arm/clzdi2.S"), ("__clzsi2", "arm/clzsi2.S"), ("__divmodsi4", "arm/divmodsi4.S"), ("__divsi3", "arm/divsi3.S"), ("__modsi3", "arm/modsi3.S"), ("__switch16", "arm/switch16.S"), ("__switch32", "arm/switch32.S"), ("__switch8", "arm/switch8.S"), ("__switchu8", "arm/switchu8.S"), ("__sync_synchronize", "arm/sync_synchronize.S"), ("__udivmodsi4", "arm/udivmodsi4.S"), ("__udivsi3", "arm/udivsi3.S"), ("__umodsi3", "arm/umodsi3.S"), ]); if target_os == "freebsd" { sources.extend(&[("__clear_cache", "clear_cache.c")]); } // First of all aeabi_cdcmp and aeabi_cfcmp are never called by LLVM. // Second are little-endian only, so build fail on big-endian targets. // Temporally workaround: exclude these files for big-endian targets. if !llvm_target[0].starts_with("thumbeb") && !llvm_target[0].starts_with("armeb") { sources.extend(&[ ("__aeabi_cdcmp", "arm/aeabi_cdcmp.S"), ("__aeabi_cdcmpeq_check_nan", "arm/aeabi_cdcmpeq_check_nan.c"), ("__aeabi_cfcmp", "arm/aeabi_cfcmp.S"), ("__aeabi_cfcmpeq_check_nan", "arm/aeabi_cfcmpeq_check_nan.c"), ]); } } if llvm_target[0] == "armv7" { sources.extend(&[ ("__sync_fetch_and_add_4", "arm/sync_fetch_and_add_4.S"), ("__sync_fetch_and_add_8", "arm/sync_fetch_and_add_8.S"), ("__sync_fetch_and_and_4", "arm/sync_fetch_and_and_4.S"), ("__sync_fetch_and_and_8", "arm/sync_fetch_and_and_8.S"), ("__sync_fetch_and_max_4", "arm/sync_fetch_and_max_4.S"), ("__sync_fetch_and_max_8", "arm/sync_fetch_and_max_8.S"), ("__sync_fetch_and_min_4", "arm/sync_fetch_and_min_4.S"), ("__sync_fetch_and_min_8", "arm/sync_fetch_and_min_8.S"), ("__sync_fetch_and_nand_4", "arm/sync_fetch_and_nand_4.S"), ("__sync_fetch_and_nand_8", "arm/sync_fetch_and_nand_8.S"), ("__sync_fetch_and_or_4", "arm/sync_fetch_and_or_4.S"), ("__sync_fetch_and_or_8", "arm/sync_fetch_and_or_8.S"), ("__sync_fetch_and_sub_4", "arm/sync_fetch_and_sub_4.S"), ("__sync_fetch_and_sub_8", "arm/sync_fetch_and_sub_8.S"), ("__sync_fetch_and_umax_4", "arm/sync_fetch_and_umax_4.S"), ("__sync_fetch_and_umax_8", "arm/sync_fetch_and_umax_8.S"), ("__sync_fetch_and_umin_4", "arm/sync_fetch_and_umin_4.S"), ("__sync_fetch_and_umin_8", "arm/sync_fetch_and_umin_8.S"), ("__sync_fetch_and_xor_4", "arm/sync_fetch_and_xor_4.S"), ("__sync_fetch_and_xor_8", "arm/sync_fetch_and_xor_8.S"), ]); } if llvm_target.last().unwrap().ends_with("eabihf") { if !llvm_target[0].starts_with("thumbv7em") && !llvm_target[0].starts_with("thumbv8m.main") { // The FPU option chosen for these architectures in cc-rs, ie: // -mfpu=fpv4-sp-d16 for thumbv7em // -mfpu=fpv5-sp-d16 for thumbv8m.main // do not support double precision floating points conversions so the files // that include such instructions are not included for these targets. sources.extend(&[ ("__fixdfsivfp", "arm/fixdfsivfp.S"), ("__fixunsdfsivfp", "arm/fixunsdfsivfp.S"), ("__floatsidfvfp", "arm/floatsidfvfp.S"), ("__floatunssidfvfp", "arm/floatunssidfvfp.S"), ]); } sources.extend(&[ ("__fixsfsivfp", "arm/fixsfsivfp.S"), ("__fixunssfsivfp", "arm/fixunssfsivfp.S"), ("__floatsisfvfp", "arm/floatsisfvfp.S"), ("__floatunssisfvfp", "arm/floatunssisfvfp.S"), ("__floatunssisfvfp", "arm/floatunssisfvfp.S"), ("__restore_vfp_d8_d15_regs", "arm/restore_vfp_d8_d15_regs.S"), ("__save_vfp_d8_d15_regs", "arm/save_vfp_d8_d15_regs.S"), ("__negdf2vfp", "arm/negdf2vfp.S"), ("__negsf2vfp", "arm/negsf2vfp.S"), ]); } if target_arch == "aarch64" && consider_float_intrinsics { sources.extend(&[ ("__comparetf2", "comparetf2.c"), ("__extenddftf2", "extenddftf2.c"), ("__extendsftf2", "extendsftf2.c"), ("__fixtfdi", "fixtfdi.c"), ("__fixtfsi", "fixtfsi.c"), ("__fixtfti", "fixtfti.c"), ("__fixunstfdi", "fixunstfdi.c"), ("__fixunstfsi", "fixunstfsi.c"), ("__fixunstfti", "fixunstfti.c"), ("__floatditf", "floatditf.c"), ("__floatsitf", "floatsitf.c"), ("__floatunditf", "floatunditf.c"), ("__floatunsitf", "floatunsitf.c"), ("__trunctfdf2", "trunctfdf2.c"), ("__trunctfsf2", "trunctfsf2.c"), ("__addtf3", "addtf3.c"), ("__multf3", "multf3.c"), ("__subtf3", "subtf3.c"), ("__divtf3", "divtf3.c"), ("__powitf2", "powitf2.c"), ("__fe_getround", "fp_mode.c"), ("__fe_raise_inexact", "fp_mode.c"), ]); if target_os != "windows" { sources.extend(&[("__multc3", "multc3.c")]); } } if target_arch == "mips" { sources.extend(&[("__bswapsi2", "bswapsi2.c")]); } if target_arch == "mips64" { sources.extend(&[ ("__extenddftf2", "extenddftf2.c"), ("__netf2", "comparetf2.c"), ("__addtf3", "addtf3.c"), ("__multf3", "multf3.c"), ("__subtf3", "subtf3.c"), ("__fixtfsi", "fixtfsi.c"), ("__floatsitf", "floatsitf.c"), ("__fixunstfsi", "fixunstfsi.c"), ("__floatunsitf", "floatunsitf.c"), ("__fe_getround", "fp_mode.c"), ("__divtf3", "divtf3.c"), ("__trunctfdf2", "trunctfdf2.c"), ("__trunctfsf2", "trunctfsf2.c"), ]); } // Remove the assembly implementations that won't compile for the target if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target_os == "uefi" { let mut to_remove = Vec::new(); for (k, v) in sources.map.iter() { if v.ends_with(".S") { to_remove.push(*k); } } sources.remove(&to_remove); // But use some generic implementations where possible sources.extend(&[("__clzdi2", "clzdi2.c"), ("__clzsi2", "clzsi2.c")]) } if llvm_target[0] == "thumbv7m" || llvm_target[0] == "thumbv7em" { sources.remove(&["__aeabi_cdcmp", "__aeabi_cfcmp"]); } // Android uses emulated TLS so we need a runtime support function. if target_os == "android" { sources.extend(&[("__emutls_get_address", "emutls.c")]); // Work around a bug in the NDK headers (fixed in // https://r.android.com/2038949 which will be released in a future // NDK version) by providing a definition of LONG_BIT. cfg.define("LONG_BIT", "(8 * sizeof(long))"); } // OpenHarmony also uses emulated TLS. if target_env == "ohos" { sources.extend(&[("__emutls_get_address", "emutls.c")]); } // When compiling the C code we require the user to tell us where the // source code is, and this is largely done so when we're compiling as // part of rust-lang/rust we can use the same llvm-project repository as // rust-lang/rust. let root = match env::var_os("RUST_COMPILER_RT_ROOT") { Some(s) => PathBuf::from(s), None => panic!("RUST_COMPILER_RT_ROOT is not set"), }; if !root.exists() { panic!("RUST_COMPILER_RT_ROOT={} does not exist", root.display()); } // Support deterministic builds by remapping the __FILE__ prefix if the // compiler supports it. This fixes the nondeterminism caused by the // use of that macro in lib/builtins/int_util.h in compiler-rt. cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display())); // Include out-of-line atomics for aarch64, which are all generated by supplying different // sets of flags to the same source file. // Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430). let src_dir = root.join("lib/builtins"); if target_arch == "aarch64" && target_env != "msvc" { // See below for why we're building these as separate libraries. build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg); // Some run-time CPU feature detection is necessary, as well. sources.extend(&[("__aarch64_have_lse_atomics", "cpu_model.c")]); } let mut added_sources = HashSet::new(); for (sym, src) in sources.map.iter() { let src = src_dir.join(src); if added_sources.insert(src.clone()) { cfg.file(&src); println!("cargo:rerun-if-changed={}", src.display()); } println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); } cfg.compile("libcompiler-rt.a"); } fn build_aarch64_out_of_line_atomics_libraries(builtins_dir: &Path, cfg: &mut cc::Build) { let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); let outlined_atomics_file = builtins_dir.join("aarch64/lse.S"); println!("cargo:rerun-if-changed={}", outlined_atomics_file.display()); cfg.include(&builtins_dir); for instruction_type in &["cas", "swp", "ldadd", "ldclr", "ldeor", "ldset"] { for size in &[1, 2, 4, 8, 16] { if *size == 16 && *instruction_type != "cas" { continue; } for (model_number, model_name) in &[(1, "relax"), (2, "acq"), (3, "rel"), (4, "acq_rel")] { // The original compiler-rt build system compiles the same // source file multiple times with different compiler // options. Here we do something slightly different: we // create multiple .S files with the proper #defines and // then include the original file. // // This is needed because the cc crate doesn't allow us to // override the name of object files and libtool requires // all objects in an archive to have unique names. let path = out_dir.join(format!("lse_{}{}_{}.S", instruction_type, size, model_name)); let mut file = File::create(&path).unwrap(); writeln!(file, "#define L_{}", instruction_type).unwrap(); writeln!(file, "#define SIZE {}", size).unwrap(); writeln!(file, "#define MODEL {}", model_number).unwrap(); writeln!( file, "#include \"{}\"", outlined_atomics_file.canonicalize().unwrap().display() ) .unwrap(); drop(file); cfg.file(path); let sym = format!("__aarch64_{}{}_{}", instruction_type, size, model_name); println!("cargo:rustc-cfg={}=\"optimized-c\"", sym); } } } } } compiler_builtins-0.1.101/examples/intrinsics.rs000064400000000000000000000201741046102023000200640ustar 00000000000000// By compiling this file we check that all the intrinsics we care about continue to be provided by // the `compiler_builtins` crate regardless of the changes we make to it. If we, by mistake, stop // compiling a C implementation and forget to implement that intrinsic in Rust, this file will fail // to link due to the missing intrinsic (symbol). #![allow(unused_features)] #![allow(stable_features)] // bench_black_box feature is stable, leaving for backcompat #![allow(internal_features)] #![cfg_attr(thumb, no_main)] #![deny(dead_code)] #![feature(bench_black_box)] #![feature(lang_items)] #![feature(start)] #![feature(allocator_api)] #![no_std] extern crate panic_handler; #[cfg(all(not(thumb), not(windows), not(target_arch = "wasm32")))] #[link(name = "c")] extern "C" {} // Every function in this module maps will be lowered to an intrinsic by LLVM, if the platform // doesn't have native support for the operation used in the function. ARM has a naming convention // convention for its intrinsics that's different from other architectures; that's why some function // have an additional comment: the function name is the ARM name for the intrinsic and the comment // in the non-ARM name for the intrinsic. mod intrinsics { // truncdfsf2 pub fn aeabi_d2f(x: f64) -> f32 { x as f32 } // fixdfsi pub fn aeabi_d2i(x: f64) -> i32 { x as i32 } // fixdfdi pub fn aeabi_d2l(x: f64) -> i64 { x as i64 } // fixunsdfsi pub fn aeabi_d2uiz(x: f64) -> u32 { x as u32 } // fixunsdfdi pub fn aeabi_d2ulz(x: f64) -> u64 { x as u64 } // adddf3 pub fn aeabi_dadd(a: f64, b: f64) -> f64 { a + b } // eqdf2 pub fn aeabi_dcmpeq(a: f64, b: f64) -> bool { a == b } // gtdf2 pub fn aeabi_dcmpgt(a: f64, b: f64) -> bool { a > b } // ltdf2 pub fn aeabi_dcmplt(a: f64, b: f64) -> bool { a < b } // divdf3 pub fn aeabi_ddiv(a: f64, b: f64) -> f64 { a / b } // muldf3 pub fn aeabi_dmul(a: f64, b: f64) -> f64 { a * b } // subdf3 pub fn aeabi_dsub(a: f64, b: f64) -> f64 { a - b } // extendsfdf2 pub fn aeabi_f2d(x: f32) -> f64 { x as f64 } // fixsfsi pub fn aeabi_f2iz(x: f32) -> i32 { x as i32 } // fixsfdi pub fn aeabi_f2lz(x: f32) -> i64 { x as i64 } // fixunssfsi pub fn aeabi_f2uiz(x: f32) -> u32 { x as u32 } // fixunssfdi pub fn aeabi_f2ulz(x: f32) -> u64 { x as u64 } // addsf3 pub fn aeabi_fadd(a: f32, b: f32) -> f32 { a + b } // eqsf2 pub fn aeabi_fcmpeq(a: f32, b: f32) -> bool { a == b } // gtsf2 pub fn aeabi_fcmpgt(a: f32, b: f32) -> bool { a > b } // ltsf2 pub fn aeabi_fcmplt(a: f32, b: f32) -> bool { a < b } // divsf3 pub fn aeabi_fdiv(a: f32, b: f32) -> f32 { a / b } // mulsf3 pub fn aeabi_fmul(a: f32, b: f32) -> f32 { a * b } // subsf3 pub fn aeabi_fsub(a: f32, b: f32) -> f32 { a - b } // floatsidf pub fn aeabi_i2d(x: i32) -> f64 { x as f64 } // floatsisf pub fn aeabi_i2f(x: i32) -> f32 { x as f32 } pub fn aeabi_idiv(a: i32, b: i32) -> i32 { a.wrapping_div(b) } pub fn aeabi_idivmod(a: i32, b: i32) -> i32 { a % b } // floatdidf pub fn aeabi_l2d(x: i64) -> f64 { x as f64 } // floatdisf pub fn aeabi_l2f(x: i64) -> f32 { x as f32 } // divdi3 pub fn aeabi_ldivmod(a: i64, b: i64) -> i64 { a / b } // muldi3 pub fn aeabi_lmul(a: i64, b: i64) -> i64 { a.wrapping_mul(b) } // floatunsidf pub fn aeabi_ui2d(x: u32) -> f64 { x as f64 } // floatunsisf pub fn aeabi_ui2f(x: u32) -> f32 { x as f32 } pub fn aeabi_uidiv(a: u32, b: u32) -> u32 { a / b } pub fn aeabi_uidivmod(a: u32, b: u32) -> u32 { a % b } // floatundidf pub fn aeabi_ul2d(x: u64) -> f64 { x as f64 } // floatundisf pub fn aeabi_ul2f(x: u64) -> f32 { x as f32 } // udivdi3 pub fn aeabi_uldivmod(a: u64, b: u64) -> u64 { a * b } pub fn moddi3(a: i64, b: i64) -> i64 { a % b } pub fn mulodi4(a: i64, b: i64) -> i64 { a * b } pub fn umoddi3(a: u64, b: u64) -> u64 { a % b } pub fn muloti4(a: u128, b: u128) -> Option { a.checked_mul(b) } pub fn multi3(a: u128, b: u128) -> u128 { a.wrapping_mul(b) } pub fn ashlti3(a: u128, b: usize) -> u128 { a >> b } pub fn ashrti3(a: u128, b: usize) -> u128 { a << b } pub fn lshrti3(a: i128, b: usize) -> i128 { a >> b } pub fn udivti3(a: u128, b: u128) -> u128 { a / b } pub fn umodti3(a: u128, b: u128) -> u128 { a % b } pub fn divti3(a: i128, b: i128) -> i128 { a / b } pub fn modti3(a: i128, b: i128) -> i128 { a % b } pub fn udivsi3(a: u32, b: u32) -> u32 { a / b } } fn run() { use core::hint::black_box as bb; use intrinsics::*; bb(aeabi_d2f(bb(2.))); bb(aeabi_d2i(bb(2.))); bb(aeabi_d2l(bb(2.))); bb(aeabi_d2uiz(bb(2.))); bb(aeabi_d2ulz(bb(2.))); bb(aeabi_dadd(bb(2.), bb(3.))); bb(aeabi_dcmpeq(bb(2.), bb(3.))); bb(aeabi_dcmpgt(bb(2.), bb(3.))); bb(aeabi_dcmplt(bb(2.), bb(3.))); bb(aeabi_ddiv(bb(2.), bb(3.))); bb(aeabi_dmul(bb(2.), bb(3.))); bb(aeabi_dsub(bb(2.), bb(3.))); bb(aeabi_f2d(bb(2.))); bb(aeabi_f2iz(bb(2.))); bb(aeabi_f2lz(bb(2.))); bb(aeabi_f2uiz(bb(2.))); bb(aeabi_f2ulz(bb(2.))); bb(aeabi_fadd(bb(2.), bb(3.))); bb(aeabi_fcmpeq(bb(2.), bb(3.))); bb(aeabi_fcmpgt(bb(2.), bb(3.))); bb(aeabi_fcmplt(bb(2.), bb(3.))); bb(aeabi_fdiv(bb(2.), bb(3.))); bb(aeabi_fmul(bb(2.), bb(3.))); bb(aeabi_fsub(bb(2.), bb(3.))); bb(aeabi_i2d(bb(2))); bb(aeabi_i2f(bb(2))); bb(aeabi_idiv(bb(2), bb(3))); bb(aeabi_idivmod(bb(2), bb(3))); bb(aeabi_l2d(bb(2))); bb(aeabi_l2f(bb(2))); bb(aeabi_ldivmod(bb(2), bb(3))); bb(aeabi_lmul(bb(2), bb(3))); bb(aeabi_ui2d(bb(2))); bb(aeabi_ui2f(bb(2))); bb(aeabi_uidiv(bb(2), bb(3))); bb(aeabi_uidivmod(bb(2), bb(3))); bb(aeabi_ul2d(bb(2))); bb(aeabi_ul2f(bb(2))); bb(aeabi_uldivmod(bb(2), bb(3))); bb(moddi3(bb(2), bb(3))); bb(mulodi4(bb(2), bb(3))); bb(umoddi3(bb(2), bb(3))); bb(muloti4(bb(2), bb(2))); bb(multi3(bb(2), bb(2))); bb(ashlti3(bb(2), bb(2))); bb(ashrti3(bb(2), bb(2))); bb(lshrti3(bb(2), bb(2))); bb(udivti3(bb(2), bb(2))); bb(umodti3(bb(2), bb(2))); bb(divti3(bb(2), bb(2))); bb(modti3(bb(2), bb(2))); bb(udivsi3(bb(2), bb(2))); something_with_a_dtor(&|| assert_eq!(bb(1), 1)); extern "C" { fn rust_begin_unwind(x: usize); } // if bb(false) { unsafe { rust_begin_unwind(0); } // } } fn something_with_a_dtor(f: &dyn Fn()) { struct A<'a>(&'a (dyn Fn() + 'a)); impl<'a> Drop for A<'a> { fn drop(&mut self) { (self.0)(); } } let _a = A(f); f(); } #[cfg(not(thumb))] #[start] fn main(_: isize, _: *const *const u8) -> isize { run(); 0 } #[cfg(thumb)] #[no_mangle] pub fn _start() -> ! { run(); loop {} } #[cfg(windows)] #[link(name = "kernel32")] #[link(name = "msvcrt")] extern "C" {} // ARM targets need these symbols #[no_mangle] pub fn __aeabi_unwind_cpp_pr0() {} #[no_mangle] pub fn __aeabi_unwind_cpp_pr1() {} #[cfg(not(windows))] #[allow(non_snake_case)] #[no_mangle] pub fn _Unwind_Resume() {} #[cfg(not(windows))] #[lang = "eh_personality"] #[no_mangle] pub extern "C" fn eh_personality() {} #[cfg(all(windows, target_env = "gnu"))] mod mingw_unwinding { #[no_mangle] pub fn rust_eh_personality() {} #[no_mangle] pub fn rust_eh_unwind_resume() {} #[no_mangle] pub fn rust_eh_register_frames() {} #[no_mangle] pub fn rust_eh_unregister_frames() {} } compiler_builtins-0.1.101/libm/src/math/acos.rs000064400000000000000000000073421046102023000174530ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_acos.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* acos(x) * Method : * acos(x) = pi/2 - asin(x) * acos(-x) = pi/2 + asin(x) * For |x|<=0.5 * acos(x) = pi/2 - (x + x*x^2*R(x^2)) (see asin.c) * For x>0.5 * acos(x) = pi/2 - (pi/2 - 2asin(sqrt((1-x)/2))) * = 2asin(sqrt((1-x)/2)) * = 2s + 2s*z*R(z) ...z=(1-x)/2, s=sqrt(z) * = 2f + (2c + 2s*z*R(z)) * where f=hi part of s, and c = (z-f*f)/(s+f) is the correction term * for f so that f+c ~ sqrt(z). * For x<-0.5 * acos(x) = pi - 2asin(sqrt((1-|x|)/2)) * = pi - 0.5*(s+s*z*R(z)), where z=(1-|x|)/2,s=sqrt(z) * * Special cases: * if x is NaN, return x itself; * if |x|>1, return NaN with invalid signal. * * Function needed: sqrt */ use super::sqrt; const PIO2_HI: f64 = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ const PIO2_LO: f64 = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ const PS0: f64 = 1.66666666666666657415e-01; /* 0x3FC55555, 0x55555555 */ const PS1: f64 = -3.25565818622400915405e-01; /* 0xBFD4D612, 0x03EB6F7D */ const PS2: f64 = 2.01212532134862925881e-01; /* 0x3FC9C155, 0x0E884455 */ const PS3: f64 = -4.00555345006794114027e-02; /* 0xBFA48228, 0xB5688F3B */ const PS4: f64 = 7.91534994289814532176e-04; /* 0x3F49EFE0, 0x7501B288 */ const PS5: f64 = 3.47933107596021167570e-05; /* 0x3F023DE1, 0x0DFDF709 */ const QS1: f64 = -2.40339491173441421878e+00; /* 0xC0033A27, 0x1C8A2D4B */ const QS2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ const QS3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ const QS4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ fn r(z: f64) -> f64 { let p: f64 = z * (PS0 + z * (PS1 + z * (PS2 + z * (PS3 + z * (PS4 + z * PS5))))); let q: f64 = 1.0 + z * (QS1 + z * (QS2 + z * (QS3 + z * QS4))); p / q } /// Arccosine (f64) /// /// Computes the inverse cosine (arc cosine) of the input value. /// Arguments must be in the range -1 to 1. /// Returns values in radians, in the range of 0 to pi. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acos(x: f64) -> f64 { let x1p_120f = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ -120 let z: f64; let w: f64; let s: f64; let c: f64; let df: f64; let hx: u32; let ix: u32; hx = (x.to_bits() >> 32) as u32; ix = hx & 0x7fffffff; /* |x| >= 1 or nan */ if ix >= 0x3ff00000 { let lx: u32 = x.to_bits() as u32; if ((ix - 0x3ff00000) | lx) == 0 { /* acos(1)=0, acos(-1)=pi */ if (hx >> 31) != 0 { return 2. * PIO2_HI + x1p_120f; } return 0.; } return 0. / (x - x); } /* |x| < 0.5 */ if ix < 0x3fe00000 { if ix <= 0x3c600000 { /* |x| < 2**-57 */ return PIO2_HI + x1p_120f; } return PIO2_HI - (x - (PIO2_LO - x * r(x * x))); } /* x < -0.5 */ if (hx >> 31) != 0 { z = (1.0 + x) * 0.5; s = sqrt(z); w = r(z) * s - PIO2_LO; return 2. * (PIO2_HI - (s + w)); } /* x > 0.5 */ z = (1.0 - x) * 0.5; s = sqrt(z); // Set the low 4 bytes to zero df = f64::from_bits(s.to_bits() & 0xff_ff_ff_ff_00_00_00_00); c = (z - df * df) / (s + df); w = r(z) * s + c; 2. * (df + w) } compiler_builtins-0.1.101/libm/src/math/acosf.rs000064400000000000000000000042521046102023000176160ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_acosf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::sqrtf::sqrtf; const PIO2_HI: f32 = 1.5707962513e+00; /* 0x3fc90fda */ const PIO2_LO: f32 = 7.5497894159e-08; /* 0x33a22168 */ const P_S0: f32 = 1.6666586697e-01; const P_S1: f32 = -4.2743422091e-02; const P_S2: f32 = -8.6563630030e-03; const Q_S1: f32 = -7.0662963390e-01; fn r(z: f32) -> f32 { let p = z * (P_S0 + z * (P_S1 + z * P_S2)); let q = 1. + z * Q_S1; p / q } /// Arccosine (f32) /// /// Computes the inverse cosine (arc cosine) of the input value. /// Arguments must be in the range -1 to 1. /// Returns values in radians, in the range of 0 to pi. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acosf(x: f32) -> f32 { let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) let z: f32; let w: f32; let s: f32; let mut hx = x.to_bits(); let ix = hx & 0x7fffffff; /* |x| >= 1 or nan */ if ix >= 0x3f800000 { if ix == 0x3f800000 { if (hx >> 31) != 0 { return 2. * PIO2_HI + x1p_120; } return 0.; } return 0. / (x - x); } /* |x| < 0.5 */ if ix < 0x3f000000 { if ix <= 0x32800000 { /* |x| < 2**-26 */ return PIO2_HI + x1p_120; } return PIO2_HI - (x - (PIO2_LO - x * r(x * x))); } /* x < -0.5 */ if (hx >> 31) != 0 { z = (1. + x) * 0.5; s = sqrtf(z); w = r(z) * s - PIO2_LO; return 2. * (PIO2_HI - (s + w)); } /* x > 0.5 */ z = (1. - x) * 0.5; s = sqrtf(z); hx = s.to_bits(); let df = f32::from_bits(hx & 0xfffff000); let c = (z - df * df) / (s + df); w = r(z) * s + c; 2. * (df + w) } compiler_builtins-0.1.101/libm/src/math/acosh.rs000064400000000000000000000015431046102023000176200ustar 00000000000000use super::{log, log1p, sqrt}; const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ /// Inverse hyperbolic cosine (f64) /// /// Calculates the inverse hyperbolic cosine of `x`. /// Is defined as `log(x + sqrt(x*x-1))`. /// `x` must be a number greater than or equal to 1. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acosh(x: f64) -> f64 { let u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; /* x < 1 domain error is handled in the called functions */ if e < 0x3ff + 1 { /* |x| < 2, up to 2ulp error in [1,1.125] */ return log1p(x - 1.0 + sqrt((x - 1.0) * (x - 1.0) + 2.0 * (x - 1.0))); } if e < 0x3ff + 26 { /* |x| < 0x1p26 */ return log(2.0 * x - 1.0 / (x + sqrt(x * x - 1.0))); } /* |x| >= 0x1p26 or nan */ return log(x) + LN2; } compiler_builtins-0.1.101/libm/src/math/acoshf.rs000064400000000000000000000014671046102023000177730ustar 00000000000000use super::{log1pf, logf, sqrtf}; const LN2: f32 = 0.693147180559945309417232121458176568; /// Inverse hyperbolic cosine (f32) /// /// Calculates the inverse hyperbolic cosine of `x`. /// Is defined as `log(x + sqrt(x*x-1))`. /// `x` must be a number greater than or equal to 1. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn acoshf(x: f32) -> f32 { let u = x.to_bits(); let a = u & 0x7fffffff; if a < 0x3f800000 + (1 << 23) { /* |x| < 2, invalid if x < 1 or nan */ /* up to 2ulp error in [1,1.125] */ return log1pf(x - 1.0 + sqrtf((x - 1.0) * (x - 1.0) + 2.0 * (x - 1.0))); } if a < 0x3f800000 + (12 << 23) { /* |x| < 0x1p12 */ return logf(2.0 * x - 1.0 / (x + sqrtf(x * x - 1.0))); } /* x >= 0x1p12 */ return logf(x) + LN2; } compiler_builtins-0.1.101/libm/src/math/asin.rs000064400000000000000000000103001046102023000174440ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_asin.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* asin(x) * Method : * Since asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ... * we approximate asin(x) on [0,0.5] by * asin(x) = x + x*x^2*R(x^2) * where * R(x^2) is a rational approximation of (asin(x)-x)/x^3 * and its remez error is bounded by * |(asin(x)-x)/x^3 - R(x^2)| < 2^(-58.75) * * For x in [0.5,1] * asin(x) = pi/2-2*asin(sqrt((1-x)/2)) * Let y = (1-x), z = y/2, s := sqrt(z), and pio2_hi+pio2_lo=pi/2; * then for x>0.98 * asin(x) = pi/2 - 2*(s+s*z*R(z)) * = pio2_hi - (2*(s+s*z*R(z)) - pio2_lo) * For x<=0.98, let pio4_hi = pio2_hi/2, then * f = hi part of s; * c = sqrt(z) - f = (z-f*f)/(s+f) ...f+c=sqrt(z) * and * asin(x) = pi/2 - 2*(s+s*z*R(z)) * = pio4_hi+(pio4-2s)-(2s*z*R(z)-pio2_lo) * = pio4_hi+(pio4-2f)-(2s*z*R(z)-(pio2_lo+2c)) * * Special cases: * if x is NaN, return x itself; * if |x|>1, return NaN with invalid signal. * */ use super::{fabs, get_high_word, get_low_word, sqrt, with_set_low_word}; const PIO2_HI: f64 = 1.57079632679489655800e+00; /* 0x3FF921FB, 0x54442D18 */ const PIO2_LO: f64 = 6.12323399573676603587e-17; /* 0x3C91A626, 0x33145C07 */ /* coefficients for R(x^2) */ const P_S0: f64 = 1.66666666666666657415e-01; /* 0x3FC55555, 0x55555555 */ const P_S1: f64 = -3.25565818622400915405e-01; /* 0xBFD4D612, 0x03EB6F7D */ const P_S2: f64 = 2.01212532134862925881e-01; /* 0x3FC9C155, 0x0E884455 */ const P_S3: f64 = -4.00555345006794114027e-02; /* 0xBFA48228, 0xB5688F3B */ const P_S4: f64 = 7.91534994289814532176e-04; /* 0x3F49EFE0, 0x7501B288 */ const P_S5: f64 = 3.47933107596021167570e-05; /* 0x3F023DE1, 0x0DFDF709 */ const Q_S1: f64 = -2.40339491173441421878e+00; /* 0xC0033A27, 0x1C8A2D4B */ const Q_S2: f64 = 2.02094576023350569471e+00; /* 0x40002AE5, 0x9C598AC8 */ const Q_S3: f64 = -6.88283971605453293030e-01; /* 0xBFE6066C, 0x1B8D0159 */ const Q_S4: f64 = 7.70381505559019352791e-02; /* 0x3FB3B8C5, 0xB12E9282 */ fn comp_r(z: f64) -> f64 { let p = z * (P_S0 + z * (P_S1 + z * (P_S2 + z * (P_S3 + z * (P_S4 + z * P_S5))))); let q = 1.0 + z * (Q_S1 + z * (Q_S2 + z * (Q_S3 + z * Q_S4))); p / q } /// Arcsine (f64) /// /// Computes the inverse sine (arc sine) of the argument `x`. /// Arguments to asin must be in the range -1 to 1. /// Returns values in radians, in the range of -pi/2 to pi/2. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asin(mut x: f64) -> f64 { let z: f64; let r: f64; let s: f64; let hx: u32; let ix: u32; hx = get_high_word(x); ix = hx & 0x7fffffff; /* |x| >= 1 or nan */ if ix >= 0x3ff00000 { let lx: u32; lx = get_low_word(x); if ((ix - 0x3ff00000) | lx) == 0 { /* asin(1) = +-pi/2 with inexact */ return x * PIO2_HI + f64::from_bits(0x3870000000000000); } else { return 0.0 / (x - x); } } /* |x| < 0.5 */ if ix < 0x3fe00000 { /* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */ if ix < 0x3e500000 && ix >= 0x00100000 { return x; } else { return x + x * comp_r(x * x); } } /* 1 > |x| >= 0.5 */ z = (1.0 - fabs(x)) * 0.5; s = sqrt(z); r = comp_r(z); if ix >= 0x3fef3333 { /* if |x| > 0.975 */ x = PIO2_HI - (2. * (s + s * r) - PIO2_LO); } else { let f: f64; let c: f64; /* f+c = sqrt(z) */ f = with_set_low_word(s, 0); c = (z - f * f) / (s + f); x = 0.5 * PIO2_HI - (2.0 * s * r - (PIO2_LO - 2.0 * c) - (0.5 * PIO2_HI - 2.0 * f)); } if hx >> 31 != 0 { -x } else { x } } compiler_builtins-0.1.101/libm/src/math/asinf.rs000064400000000000000000000040061046102023000176200ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_asinf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::fabsf::fabsf; use super::sqrt::sqrt; const PIO2: f64 = 1.570796326794896558e+00; /* coefficients for R(x^2) */ const P_S0: f32 = 1.6666586697e-01; const P_S1: f32 = -4.2743422091e-02; const P_S2: f32 = -8.6563630030e-03; const Q_S1: f32 = -7.0662963390e-01; fn r(z: f32) -> f32 { let p = z * (P_S0 + z * (P_S1 + z * P_S2)); let q = 1. + z * Q_S1; p / q } /// Arcsine (f32) /// /// Computes the inverse sine (arc sine) of the argument `x`. /// Arguments to asin must be in the range -1 to 1. /// Returns values in radians, in the range of -pi/2 to pi/2. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asinf(mut x: f32) -> f32 { let x1p_120 = f64::from_bits(0x3870000000000000); // 0x1p-120 === 2 ^ (-120) let hx = x.to_bits(); let ix = hx & 0x7fffffff; if ix >= 0x3f800000 { /* |x| >= 1 */ if ix == 0x3f800000 { /* |x| == 1 */ return ((x as f64) * PIO2 + x1p_120) as f32; /* asin(+-1) = +-pi/2 with inexact */ } return 0. / (x - x); /* asin(|x|>1) is NaN */ } if ix < 0x3f000000 { /* |x| < 0.5 */ /* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */ if (ix < 0x39800000) && (ix >= 0x00800000) { return x; } return x + x * r(x * x); } /* 1 > |x| >= 0.5 */ let z = (1. - fabsf(x)) * 0.5; let s = sqrt(z as f64); x = (PIO2 - 2. * (s + s * (r(z) as f64))) as f32; if (hx >> 31) != 0 { -x } else { x } } compiler_builtins-0.1.101/libm/src/math/asinh.rs000064400000000000000000000022121046102023000176170ustar 00000000000000use super::{log, log1p, sqrt}; const LN2: f64 = 0.693147180559945309417232121458176568; /* 0x3fe62e42, 0xfefa39ef*/ /* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ /// Inverse hyperbolic sine (f64) /// /// Calculates the inverse hyperbolic sine of `x`. /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asinh(mut x: f64) -> f64 { let mut u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; let sign = (u >> 63) != 0; /* |x| */ u &= (!0) >> 1; x = f64::from_bits(u); if e >= 0x3ff + 26 { /* |x| >= 0x1p26 or inf or nan */ x = log(x) + LN2; } else if e >= 0x3ff + 1 { /* |x| >= 2 */ x = log(2.0 * x + 1.0 / (sqrt(x * x + 1.0) + x)); } else if e >= 0x3ff - 26 { /* |x| >= 0x1p-26, up to 1.6ulp error in [0.125,0.5] */ x = log1p(x + x * x / (sqrt(x * x + 1.0) + 1.0)); } else { /* |x| < 0x1p-26, raise inexact if x != 0 */ let x1p120 = f64::from_bits(0x4770000000000000); force_eval!(x + x1p120); } if sign { -x } else { x } } compiler_builtins-0.1.101/libm/src/math/asinhf.rs000064400000000000000000000021551046102023000177730ustar 00000000000000use super::{log1pf, logf, sqrtf}; const LN2: f32 = 0.693147180559945309417232121458176568; /* asinh(x) = sign(x)*log(|x|+sqrt(x*x+1)) ~= x - x^3/6 + o(x^5) */ /// Inverse hyperbolic sine (f32) /// /// Calculates the inverse hyperbolic sine of `x`. /// Is defined as `sgn(x)*log(|x|+sqrt(x*x+1))`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn asinhf(mut x: f32) -> f32 { let u = x.to_bits(); let i = u & 0x7fffffff; let sign = (u >> 31) != 0; /* |x| */ x = f32::from_bits(i); if i >= 0x3f800000 + (12 << 23) { /* |x| >= 0x1p12 or inf or nan */ x = logf(x) + LN2; } else if i >= 0x3f800000 + (1 << 23) { /* |x| >= 2 */ x = logf(2.0 * x + 1.0 / (sqrtf(x * x + 1.0) + x)); } else if i >= 0x3f800000 - (12 << 23) { /* |x| >= 0x1p-12, up to 1.6ulp error in [0.125,0.5] */ x = log1pf(x + x * x / (sqrtf(x * x + 1.0) + 1.0)); } else { /* |x| < 0x1p-12, raise inexact if x!=0 */ let x1p120 = f32::from_bits(0x7b800000); force_eval!(x + x1p120); } if sign { -x } else { x } } compiler_builtins-0.1.101/libm/src/math/atan.rs000064400000000000000000000132161046102023000174460ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_atan.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* atan(x) * Method * 1. Reduce x to positive by atan(x) = -atan(-x). * 2. According to the integer k=4t+0.25 chopped, t=x, the argument * is further reduced to one of the following intervals and the * arctangent of t is evaluated by the corresponding formula: * * [0,7/16] atan(x) = t-t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) * [7/16,11/16] atan(x) = atan(1/2) + atan( (t-0.5)/(1+t/2) ) * [11/16.19/16] atan(x) = atan( 1 ) + atan( (t-1)/(1+t) ) * [19/16,39/16] atan(x) = atan(3/2) + atan( (t-1.5)/(1+1.5t) ) * [39/16,INF] atan(x) = atan(INF) + atan( -1/t ) * * Constants: * The hexadecimal values are the intended ones for the following * constants. The decimal values may be used, provided that the * compiler will convert from decimal to binary accurately enough * to produce the hexadecimal values shown. */ use super::fabs; use core::f64; const ATANHI: [f64; 4] = [ 4.63647609000806093515e-01, /* atan(0.5)hi 0x3FDDAC67, 0x0561BB4F */ 7.85398163397448278999e-01, /* atan(1.0)hi 0x3FE921FB, 0x54442D18 */ 9.82793723247329054082e-01, /* atan(1.5)hi 0x3FEF730B, 0xD281F69B */ 1.57079632679489655800e+00, /* atan(inf)hi 0x3FF921FB, 0x54442D18 */ ]; const ATANLO: [f64; 4] = [ 2.26987774529616870924e-17, /* atan(0.5)lo 0x3C7A2B7F, 0x222F65E2 */ 3.06161699786838301793e-17, /* atan(1.0)lo 0x3C81A626, 0x33145C07 */ 1.39033110312309984516e-17, /* atan(1.5)lo 0x3C700788, 0x7AF0CBBD */ 6.12323399573676603587e-17, /* atan(inf)lo 0x3C91A626, 0x33145C07 */ ]; const AT: [f64; 11] = [ 3.33333333333329318027e-01, /* 0x3FD55555, 0x5555550D */ -1.99999999998764832476e-01, /* 0xBFC99999, 0x9998EBC4 */ 1.42857142725034663711e-01, /* 0x3FC24924, 0x920083FF */ -1.11111104054623557880e-01, /* 0xBFBC71C6, 0xFE231671 */ 9.09088713343650656196e-02, /* 0x3FB745CD, 0xC54C206E */ -7.69187620504482999495e-02, /* 0xBFB3B0F2, 0xAF749A6D */ 6.66107313738753120669e-02, /* 0x3FB10D66, 0xA0D03D51 */ -5.83357013379057348645e-02, /* 0xBFADDE2D, 0x52DEFD9A */ 4.97687799461593236017e-02, /* 0x3FA97B4B, 0x24760DEB */ -3.65315727442169155270e-02, /* 0xBFA2B444, 0x2C6A6C2F */ 1.62858201153657823623e-02, /* 0x3F90AD3A, 0xE322DA11 */ ]; /// Arctangent (f64) /// /// Computes the inverse tangent (arc tangent) of the input value. /// Returns a value in radians, in the range of -pi/2 to pi/2. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan(x: f64) -> f64 { let mut x = x; let mut ix = (x.to_bits() >> 32) as u32; let sign = ix >> 31; ix &= 0x7fff_ffff; if ix >= 0x4410_0000 { if x.is_nan() { return x; } let z = ATANHI[3] + f64::from_bits(0x0380_0000); // 0x1p-120f return if sign != 0 { -z } else { z }; } let id = if ix < 0x3fdc_0000 { /* |x| < 0.4375 */ if ix < 0x3e40_0000 { /* |x| < 2^-27 */ if ix < 0x0010_0000 { /* raise underflow for subnormal x */ force_eval!(x as f32); } return x; } -1 } else { x = fabs(x); if ix < 0x3ff30000 { /* |x| < 1.1875 */ if ix < 0x3fe60000 { /* 7/16 <= |x| < 11/16 */ x = (2. * x - 1.) / (2. + x); 0 } else { /* 11/16 <= |x| < 19/16 */ x = (x - 1.) / (x + 1.); 1 } } else if ix < 0x40038000 { /* |x| < 2.4375 */ x = (x - 1.5) / (1. + 1.5 * x); 2 } else { /* 2.4375 <= |x| < 2^66 */ x = -1. / x; 3 } }; let z = x * x; let w = z * z; /* break sum from i=0 to 10 AT[i]z**(i+1) into odd and even poly */ let s1 = z * (AT[0] + w * (AT[2] + w * (AT[4] + w * (AT[6] + w * (AT[8] + w * AT[10]))))); let s2 = w * (AT[1] + w * (AT[3] + w * (AT[5] + w * (AT[7] + w * AT[9])))); if id < 0 { return x - x * (s1 + s2); } let z = i!(ATANHI, id as usize) - (x * (s1 + s2) - i!(ATANLO, id as usize) - x); if sign != 0 { -z } else { z } } #[cfg(test)] mod tests { use super::atan; use core::f64; #[test] fn sanity_check() { for (input, answer) in [ (3.0_f64.sqrt() / 3.0, f64::consts::FRAC_PI_6), (1.0, f64::consts::FRAC_PI_4), (3.0_f64.sqrt(), f64::consts::FRAC_PI_3), (-3.0_f64.sqrt() / 3.0, -f64::consts::FRAC_PI_6), (-1.0, -f64::consts::FRAC_PI_4), (-3.0_f64.sqrt(), -f64::consts::FRAC_PI_3), ] .iter() { assert!( (atan(*input) - answer) / answer < 1e-5, "\natan({:.4}/16) = {:.4}, actual: {}", input * 16.0, answer, atan(*input) ); } } #[test] fn zero() { assert_eq!(atan(0.0), 0.0); } #[test] fn infinity() { assert_eq!(atan(f64::INFINITY), f64::consts::FRAC_PI_2); } #[test] fn minus_infinity() { assert_eq!(atan(f64::NEG_INFINITY), -f64::consts::FRAC_PI_2); } #[test] fn nan() { assert!(atan(f64::NAN).is_nan()); } } compiler_builtins-0.1.101/libm/src/math/atan2.rs000064400000000000000000000104031046102023000175230ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== * */ /* atan2(y,x) * Method : * 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). * 2. Reduce x to positive by (if x and y are unexceptional): * ARG (x+iy) = arctan(y/x) ... if x > 0, * ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, * * Special cases: * * ATAN2((anything), NaN ) is NaN; * ATAN2(NAN , (anything) ) is NaN; * ATAN2(+-0, +(anything but NaN)) is +-0 ; * ATAN2(+-0, -(anything but NaN)) is +-pi ; * ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2; * ATAN2(+-(anything but INF and NaN), +INF) is +-0 ; * ATAN2(+-(anything but INF and NaN), -INF) is +-pi; * ATAN2(+-INF,+INF ) is +-pi/4 ; * ATAN2(+-INF,-INF ) is +-3pi/4; * ATAN2(+-INF, (anything but,0,NaN, and INF)) is +-pi/2; * * Constants: * The hexadecimal values are the intended ones for the following * constants. The decimal values may be used, provided that the * compiler will convert from decimal to binary accurately enough * to produce the hexadecimal values shown. */ use super::atan; use super::fabs; const PI: f64 = 3.1415926535897931160E+00; /* 0x400921FB, 0x54442D18 */ const PI_LO: f64 = 1.2246467991473531772E-16; /* 0x3CA1A626, 0x33145C07 */ /// Arctangent of y/x (f64) /// /// Computes the inverse tangent (arc tangent) of `y/x`. /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). /// Returns a value in radians, in the range of -pi to pi. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan2(y: f64, x: f64) -> f64 { if x.is_nan() || y.is_nan() { return x + y; } let mut ix = (x.to_bits() >> 32) as u32; let lx = x.to_bits() as u32; let mut iy = (y.to_bits() >> 32) as u32; let ly = y.to_bits() as u32; if ((ix.wrapping_sub(0x3ff00000)) | lx) == 0 { /* x = 1.0 */ return atan(y); } let m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ ix &= 0x7fffffff; iy &= 0x7fffffff; /* when y = 0 */ if (iy | ly) == 0 { return match m { 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ 2 => PI, /* atan(+0,-anything) = PI */ _ => -PI, /* atan(-0,-anything) =-PI */ }; } /* when x = 0 */ if (ix | lx) == 0 { return if m & 1 != 0 { -PI / 2.0 } else { PI / 2.0 }; } /* when x is INF */ if ix == 0x7ff00000 { if iy == 0x7ff00000 { return match m { 0 => PI / 4.0, /* atan(+INF,+INF) */ 1 => -PI / 4.0, /* atan(-INF,+INF) */ 2 => 3.0 * PI / 4.0, /* atan(+INF,-INF) */ _ => -3.0 * PI / 4.0, /* atan(-INF,-INF) */ }; } else { return match m { 0 => 0.0, /* atan(+...,+INF) */ 1 => -0.0, /* atan(-...,+INF) */ 2 => PI, /* atan(+...,-INF) */ _ => -PI, /* atan(-...,-INF) */ }; } } /* |y/x| > 0x1p64 */ if ix.wrapping_add(64 << 20) < iy || iy == 0x7ff00000 { return if m & 1 != 0 { -PI / 2.0 } else { PI / 2.0 }; } /* z = atan(|y/x|) without spurious underflow */ let z = if (m & 2 != 0) && iy.wrapping_add(64 << 20) < ix { /* |y/x| < 0x1p-64, x<0 */ 0.0 } else { atan(fabs(y / x)) }; match m { 0 => z, /* atan(+,+) */ 1 => -z, /* atan(-,+) */ 2 => PI - (z - PI_LO), /* atan(+,-) */ _ => (z - PI_LO) - PI, /* atan(-,-) */ } } #[test] fn sanity_check() { assert_eq!(atan2(0.0, 1.0), 0.0); assert_eq!(atan2(0.0, -1.0), PI); assert_eq!(atan2(-0.0, -1.0), -PI); assert_eq!(atan2(3.0, 2.0), atan(3.0 / 2.0)); assert_eq!(atan2(2.0, -1.0), atan(2.0 / -1.0) + PI); assert_eq!(atan2(-2.0, -1.0), atan(-2.0 / -1.0) - PI); } compiler_builtins-0.1.101/libm/src/math/atan2f.rs000064400000000000000000000055161046102023000177020ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_atan2f.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::atanf; use super::fabsf; const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ const PI_LO: f32 = -8.7422776573e-08; /* 0xb3bbbd2e */ /// Arctangent of y/x (f32) /// /// Computes the inverse tangent (arc tangent) of `y/x`. /// Produces the correct result even for angles near pi/2 or -pi/2 (that is, when `x` is near 0). /// Returns a value in radians, in the range of -pi to pi. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atan2f(y: f32, x: f32) -> f32 { if x.is_nan() || y.is_nan() { return x + y; } let mut ix = x.to_bits(); let mut iy = y.to_bits(); if ix == 0x3f800000 { /* x=1.0 */ return atanf(y); } let m = ((iy >> 31) & 1) | ((ix >> 30) & 2); /* 2*sign(x)+sign(y) */ ix &= 0x7fffffff; iy &= 0x7fffffff; /* when y = 0 */ if iy == 0 { return match m { 0 | 1 => y, /* atan(+-0,+anything)=+-0 */ 2 => PI, /* atan(+0,-anything) = pi */ 3 | _ => -PI, /* atan(-0,-anything) =-pi */ }; } /* when x = 0 */ if ix == 0 { return if m & 1 != 0 { -PI / 2. } else { PI / 2. }; } /* when x is INF */ if ix == 0x7f800000 { return if iy == 0x7f800000 { match m { 0 => PI / 4., /* atan(+INF,+INF) */ 1 => -PI / 4., /* atan(-INF,+INF) */ 2 => 3. * PI / 4., /* atan(+INF,-INF)*/ 3 | _ => -3. * PI / 4., /* atan(-INF,-INF)*/ } } else { match m { 0 => 0., /* atan(+...,+INF) */ 1 => -0., /* atan(-...,+INF) */ 2 => PI, /* atan(+...,-INF) */ 3 | _ => -PI, /* atan(-...,-INF) */ } }; } /* |y/x| > 0x1p26 */ if (ix + (26 << 23) < iy) || (iy == 0x7f800000) { return if m & 1 != 0 { -PI / 2. } else { PI / 2. }; } /* z = atan(|y/x|) with correct underflow */ let z = if (m & 2 != 0) && (iy + (26 << 23) < ix) { /*|y/x| < 0x1p-26, x < 0 */ 0. } else { atanf(fabsf(y / x)) }; match m { 0 => z, /* atan(+,+) */ 1 => -z, /* atan(-,+) */ 2 => PI - (z - PI_LO), /* atan(+,-) */ _ => (z - PI_LO) - PI, /* case 3 */ /* atan(-,-) */ } } compiler_builtins-0.1.101/libm/src/math/atanf.rs000064400000000000000000000061151046102023000176140ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_atanf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::fabsf; const ATAN_HI: [f32; 4] = [ 4.6364760399e-01, /* atan(0.5)hi 0x3eed6338 */ 7.8539812565e-01, /* atan(1.0)hi 0x3f490fda */ 9.8279368877e-01, /* atan(1.5)hi 0x3f7b985e */ 1.5707962513e+00, /* atan(inf)hi 0x3fc90fda */ ]; const ATAN_LO: [f32; 4] = [ 5.0121582440e-09, /* atan(0.5)lo 0x31ac3769 */ 3.7748947079e-08, /* atan(1.0)lo 0x33222168 */ 3.4473217170e-08, /* atan(1.5)lo 0x33140fb4 */ 7.5497894159e-08, /* atan(inf)lo 0x33a22168 */ ]; const A_T: [f32; 5] = [ 3.3333328366e-01, -1.9999158382e-01, 1.4253635705e-01, -1.0648017377e-01, 6.1687607318e-02, ]; /// Arctangent (f32) /// /// Computes the inverse tangent (arc tangent) of the input value. /// Returns a value in radians, in the range of -pi/2 to pi/2. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atanf(mut x: f32) -> f32 { let x1p_120 = f32::from_bits(0x03800000); // 0x1p-120 === 2 ^ (-120) let z: f32; let mut ix = x.to_bits(); let sign = (ix >> 31) != 0; ix &= 0x7fffffff; if ix >= 0x4c800000 { /* if |x| >= 2**26 */ if x.is_nan() { return x; } z = i!(ATAN_HI, 3) + x1p_120; return if sign { -z } else { z }; } let id = if ix < 0x3ee00000 { /* |x| < 0.4375 */ if ix < 0x39800000 { /* |x| < 2**-12 */ if ix < 0x00800000 { /* raise underflow for subnormal x */ force_eval!(x * x); } return x; } -1 } else { x = fabsf(x); if ix < 0x3f980000 { /* |x| < 1.1875 */ if ix < 0x3f300000 { /* 7/16 <= |x| < 11/16 */ x = (2. * x - 1.) / (2. + x); 0 } else { /* 11/16 <= |x| < 19/16 */ x = (x - 1.) / (x + 1.); 1 } } else if ix < 0x401c0000 { /* |x| < 2.4375 */ x = (x - 1.5) / (1. + 1.5 * x); 2 } else { /* 2.4375 <= |x| < 2**26 */ x = -1. / x; 3 } }; /* end of argument reduction */ z = x * x; let w = z * z; /* break sum from i=0 to 10 aT[i]z**(i+1) into odd and even poly */ let s1 = z * (i!(A_T, 0) + w * (i!(A_T, 2) + w * i!(A_T, 4))); let s2 = w * (i!(A_T, 1) + w * i!(A_T, 3)); if id < 0 { return x - x * (s1 + s2); } let id = id as usize; let z = i!(ATAN_HI, id) - ((x * (s1 + s2) - i!(ATAN_LO, id)) - x); if sign { -z } else { z } } compiler_builtins-0.1.101/libm/src/math/atanh.rs000064400000000000000000000017121046102023000176140ustar 00000000000000use super::log1p; /* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ /// Inverse hyperbolic tangent (f64) /// /// Calculates the inverse hyperbolic tangent of `x`. /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atanh(x: f64) -> f64 { let u = x.to_bits(); let e = ((u >> 52) as usize) & 0x7ff; let sign = (u >> 63) != 0; /* |x| */ let mut y = f64::from_bits(u & 0x7fff_ffff_ffff_ffff); if e < 0x3ff - 1 { if e < 0x3ff - 32 { /* handle underflow */ if e == 0 { force_eval!(y as f32); } } else { /* |x| < 0.5, up to 1.7ulp error */ y = 0.5 * log1p(2.0 * y + 2.0 * y * y / (1.0 - y)); } } else { /* avoid overflow */ y = 0.5 * log1p(2.0 * (y / (1.0 - y))); } if sign { -y } else { y } } compiler_builtins-0.1.101/libm/src/math/atanhf.rs000064400000000000000000000017101046102023000177600ustar 00000000000000use super::log1pf; /* atanh(x) = log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2 ~= x + x^3/3 + o(x^5) */ /// Inverse hyperbolic tangent (f32) /// /// Calculates the inverse hyperbolic tangent of `x`. /// Is defined as `log((1+x)/(1-x))/2 = log1p(2x/(1-x))/2`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn atanhf(mut x: f32) -> f32 { let mut u = x.to_bits(); let sign = (u >> 31) != 0; /* |x| */ u &= 0x7fffffff; x = f32::from_bits(u); if u < 0x3f800000 - (1 << 23) { if u < 0x3f800000 - (32 << 23) { /* handle underflow */ if u < (1 << 23) { force_eval!((x * x) as f32); } } else { /* |x| < 0.5, up to 1.7ulp error */ x = 0.5 * log1pf(2.0 * x + 2.0 * x * x / (1.0 - x)); } } else { /* avoid overflow */ x = 0.5 * log1pf(2.0 * (x / (1.0 - x))); } if sign { -x } else { x } } compiler_builtins-0.1.101/libm/src/math/cbrt.rs000064400000000000000000000103671046102023000174610ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== * * Optimized by Bruce D. Evans. */ /* cbrt(x) * Return cube root of x */ use core::f64; const B1: u32 = 715094163; /* B1 = (1023-1023/3-0.03306235651)*2**20 */ const B2: u32 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */ /* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */ const P0: f64 = 1.87595182427177009643; /* 0x3ffe03e6, 0x0f61e692 */ const P1: f64 = -1.88497979543377169875; /* 0xbffe28e0, 0x92f02420 */ const P2: f64 = 1.621429720105354466140; /* 0x3ff9f160, 0x4a49d6c2 */ const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */ const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ // Cube root (f64) /// /// Computes the cube root of the argument. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrt(x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 let mut ui: u64 = x.to_bits(); let mut r: f64; let s: f64; let mut t: f64; let w: f64; let mut hx: u32 = (ui >> 32) as u32 & 0x7fffffff; if hx >= 0x7ff00000 { /* cbrt(NaN,INF) is itself */ return x + x; } /* * Rough cbrt to 5 bits: * cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3) * where e is integral and >= 0, m is real and in [0, 1), and "/" and * "%" are integer division and modulus with rounding towards minus * infinity. The RHS is always >= the LHS and has a maximum relative * error of about 1 in 16. Adding a bias of -0.03306235651 to the * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE * floating point representation, for finite positive normal values, * ordinary integer divison of the value in bits magically gives * almost exactly the RHS of the above provided we first subtract the * exponent bias (1023 for doubles) and later add it back. We do the * subtraction virtually to keep e >= 0 so that ordinary integer * division rounds towards minus infinity; this is also efficient. */ if hx < 0x00100000 { /* zero or subnormal? */ ui = (x * x1p54).to_bits(); hx = (ui >> 32) as u32 & 0x7fffffff; if hx == 0 { return x; /* cbrt(0) is itself */ } hx = hx / 3 + B2; } else { hx = hx / 3 + B1; } ui &= 1 << 63; ui |= (hx as u64) << 32; t = f64::from_bits(ui); /* * New cbrt to 23 bits: * cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x) * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r) * to within 2**-23.5 when |r - 1| < 1/10. The rough approximation * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this * gives us bounds for r = t**3/x. * * Try to optimize for parallel evaluation as in __tanf.c. */ r = (t * t) * (t / x); t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4)); /* * Round t away from zero to 23 bits (sloppily except for ensuring that * the result is larger in magnitude than cbrt(x) but not much more than * 2 23-bit ulps larger). With rounding towards zero, the error bound * would be ~5/6 instead of ~4/6. With a maximum error of 2 23-bit ulps * in the rounded t, the infinite-precision error in the Newton * approximation barely affects third digit in the final error * 0.667; the error in the rounded t can be up to about 3 23-bit ulps * before the final error is larger than 0.667 ulps. */ ui = t.to_bits(); ui = (ui + 0x80000000) & 0xffffffffc0000000; t = f64::from_bits(ui); /* one step Newton iteration to 53 bits with error < 0.667 ulps */ s = t * t; /* t*t is exact */ r = x / s; /* error <= 0.5 ulps; |r| < |t| */ w = t + t; /* t+t is exact */ r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */ t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */ t } compiler_builtins-0.1.101/libm/src/math/cbrtf.rs000064400000000000000000000041451046102023000176240ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrtf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. * Debugged and optimized by Bruce D. Evans. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* cbrtf(x) * Return cube root of x */ use core::f32; const B1: u32 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ const B2: u32 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ /// Cube root (f32) /// /// Computes the cube root of the argument. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cbrtf(x: f32) -> f32 { let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 let mut r: f64; let mut t: f64; let mut ui: u32 = x.to_bits(); let mut hx: u32 = ui & 0x7fffffff; if hx >= 0x7f800000 { /* cbrt(NaN,INF) is itself */ return x + x; } /* rough cbrt to 5 bits */ if hx < 0x00800000 { /* zero or subnormal? */ if hx == 0 { return x; /* cbrt(+-0) is itself */ } ui = (x * x1p24).to_bits(); hx = ui & 0x7fffffff; hx = hx / 3 + B2; } else { hx = hx / 3 + B1; } ui &= 0x80000000; ui |= hx; /* * First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In * double precision so that its terms can be arranged for efficiency * without causing overflow or underflow. */ t = f32::from_bits(ui) as f64; r = t * t * t; t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); /* * Second step Newton iteration to 47 bits. In double precision for * efficiency and accuracy. */ r = t * t * t; t = t * (x as f64 + x as f64 + r) / (x as f64 + r + r); /* rounding to 24 bits is perfect in round-to-nearest mode */ t as f32 } compiler_builtins-0.1.101/libm/src/math/ceil.rs000064400000000000000000000044351046102023000174420ustar 00000000000000#![allow(unreachable_code)] use core::f64; const TOINT: f64 = 1. / f64::EPSILON; /// Ceil (f64) /// /// Finds the nearest integer greater than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceil(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.ceil` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return unsafe { ::core::intrinsics::ceilf64(x) } } } #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { //use an alternative implementation on x86, because the //main implementation fails with the x87 FPU used by //debian i386, probablly due to excess precision issues. //basic implementation taken from https://github.com/rust-lang/libm/issues/219 use super::fabs; if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { let truncated = x as i64 as f64; if truncated < x { return truncated + 1.0; } else { return truncated; } } else { return x; } } let u: u64 = x.to_bits(); let e: i64 = (u >> 52 & 0x7ff) as i64; let y: f64; if e >= 0x3ff + 52 || x == 0. { return x; } // y = int(x) - x, where int(x) is an integer neighbor of x y = if (u >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; // special case because of non-nearest rounding modes if e < 0x3ff { force_eval!(y); return if (u >> 63) != 0 { -0. } else { 1. }; } if y < 0. { x + y + 1. } else { x + y } } #[cfg(test)] mod tests { use super::*; use core::f64::*; #[test] fn sanity_check() { assert_eq!(ceil(1.1), 2.0); assert_eq!(ceil(2.9), 3.0); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil #[test] fn spec_tests() { // Not Asserted: that the current rounding mode has no effect. assert!(ceil(NAN).is_nan()); for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { assert_eq!(ceil(f), f); } } } compiler_builtins-0.1.101/libm/src/math/ceilf.rs000064400000000000000000000033261046102023000176060ustar 00000000000000use core::f32; /// Ceil (f32) /// /// Finds the nearest integer greater than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ceilf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.ceil` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return unsafe { ::core::intrinsics::ceilf32(x) } } } let mut ui = x.to_bits(); let e = (((ui >> 23) & 0xff).wrapping_sub(0x7f)) as i32; if e >= 23 { return x; } if e >= 0 { let m = 0x007fffff >> e; if (ui & m) == 0 { return x; } force_eval!(x + f32::from_bits(0x7b800000)); if ui >> 31 == 0 { ui += m; } ui &= !m; } else { force_eval!(x + f32::from_bits(0x7b800000)); if ui >> 31 != 0 { return -0.0; } else if ui << 1 != 0 { return 1.0; } } f32::from_bits(ui) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::*; use core::f32::*; #[test] fn sanity_check() { assert_eq!(ceilf(1.1), 2.0); assert_eq!(ceilf(2.9), 3.0); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/ceil #[test] fn spec_tests() { // Not Asserted: that the current rounding mode has no effect. assert!(ceilf(NAN).is_nan()); for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { assert_eq!(ceilf(f), f); } } } compiler_builtins-0.1.101/libm/src/math/copysign.rs000064400000000000000000000006231046102023000203540ustar 00000000000000/// Sign of Y, magnitude of X (f64) /// /// Constructs a number with the magnitude (absolute value) of its /// first argument, `x`, and the sign of its second argument, `y`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn copysign(x: f64, y: f64) -> f64 { let mut ux = x.to_bits(); let uy = y.to_bits(); ux &= (!0) >> 1; ux |= uy & (1 << 63); f64::from_bits(ux) } compiler_builtins-0.1.101/libm/src/math/copysignf.rs000064400000000000000000000006261046102023000205250ustar 00000000000000/// Sign of Y, magnitude of X (f32) /// /// Constructs a number with the magnitude (absolute value) of its /// first argument, `x`, and the sign of its second argument, `y`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn copysignf(x: f32, y: f32) -> f32 { let mut ux = x.to_bits(); let uy = y.to_bits(); ux &= 0x7fffffff; ux |= uy & 0x80000000; f32::from_bits(ux) } compiler_builtins-0.1.101/libm/src/math/cos.rs000064400000000000000000000043171046102023000173110ustar 00000000000000// origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */ // // ==================================================== // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. // // Developed at SunPro, a Sun Microsystems, Inc. business. // Permission to use, copy, modify, and distribute this // software is freely granted, provided that this notice // is preserved. // ==================================================== use super::{k_cos, k_sin, rem_pio2}; // cos(x) // Return cosine function of x. // // kernel function: // k_sin ... sine function on [-pi/4,pi/4] // k_cos ... cosine function on [-pi/4,pi/4] // rem_pio2 ... argument reduction routine // // Method. // Let S,C and T denote the sin, cos and tan respectively on // [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 // in [-pi/4 , +pi/4], and let n = k mod 4. // We have // // n sin(x) cos(x) tan(x) // ---------------------------------------------------------- // 0 S C T // 1 C -S -1/T // 2 -S -C T // 3 -C S -1/T // ---------------------------------------------------------- // // Special cases: // Let trig be any of sin, cos, or tan. // trig(+-INF) is NaN, with signals; // trig(NaN) is that NaN; // // Accuracy: // TRIG(x) returns trig(x) nearly rounded // #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cos(x: f64) -> f64 { let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; /* |x| ~< pi/4 */ if ix <= 0x3fe921fb { if ix < 0x3e46a09e { /* if x < 2**-27 * sqrt(2) */ /* raise inexact if x != 0 */ if x as i32 == 0 { return 1.0; } } return k_cos(x, 0.0); } /* cos(Inf or NaN) is NaN */ if ix >= 0x7ff00000 { return x - x; } /* argument reduction needed */ let (n, y0, y1) = rem_pio2(x); match n & 3 { 0 => k_cos(y0, y1), 1 => -k_sin(y0, y1, 1), 2 => -k_cos(y0, y1), _ => k_sin(y0, y1, 1), } } compiler_builtins-0.1.101/libm/src/math/cosf.rs000064400000000000000000000046061046102023000174600ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_cosf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. * Optimized by Bruce D. Evans. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{k_cosf, k_sinf, rem_pio2f}; use core::f64::consts::FRAC_PI_2; /* Small multiples of pi/2 rounded to double precision. */ const C1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ const C2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ const C3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const C4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cosf(x: f32) -> f32 { let x64 = x as f64; let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 let mut ix = x.to_bits(); let sign = (ix >> 31) != 0; ix &= 0x7fffffff; if ix <= 0x3f490fda { /* |x| ~<= pi/4 */ if ix < 0x39800000 { /* |x| < 2**-12 */ /* raise inexact if x != 0 */ force_eval!(x + x1p120); return 1.; } return k_cosf(x64); } if ix <= 0x407b53d1 { /* |x| ~<= 5*pi/4 */ if ix > 0x4016cbe3 { /* |x| ~> 3*pi/4 */ return -k_cosf(if sign { x64 + C2_PIO2 } else { x64 - C2_PIO2 }); } else if sign { return k_sinf(x64 + C1_PIO2); } else { return k_sinf(C1_PIO2 - x64); } } if ix <= 0x40e231d5 { /* |x| ~<= 9*pi/4 */ if ix > 0x40afeddf { /* |x| ~> 7*pi/4 */ return k_cosf(if sign { x64 + C4_PIO2 } else { x64 - C4_PIO2 }); } else if sign { return k_sinf(-x64 - C3_PIO2); } else { return k_sinf(x64 - C3_PIO2); } } /* cos(Inf or NaN) is NaN */ if ix >= 0x7f800000 { return x - x; } /* general argument reduction needed */ let (n, y) = rem_pio2f(x); match n & 3 { 0 => k_cosf(y), 1 => k_sinf(-y), 2 => -k_cosf(y), _ => k_sinf(y), } } compiler_builtins-0.1.101/libm/src/math/cosh.rs000064400000000000000000000017411046102023000174570ustar 00000000000000use super::exp; use super::expm1; use super::k_expo2; /// Hyperbolic cosine (f64) /// /// Computes the hyperbolic cosine of the argument x. /// Is defined as `(exp(x) + exp(-x))/2` /// Angles are specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn cosh(mut x: f64) -> f64 { /* |x| */ let mut ix = x.to_bits(); ix &= 0x7fffffffffffffff; x = f64::from_bits(ix); let w = ix >> 32; /* |x| < log(2) */ if w < 0x3fe62e42 { if w < 0x3ff00000 - (26 << 20) { let x1p120 = f64::from_bits(0x4770000000000000); force_eval!(x + x1p120); return 1.; } let t = expm1(x); // exponential minus 1 return 1. + t * t / (2. * (1. + t)); } /* |x| < log(DBL_MAX) */ if w < 0x40862e42 { let t = exp(x); /* note: if x>log(0x1p26) then the 1/t is not needed */ return 0.5 * (t + 1. / t); } /* |x| > log(DBL_MAX) or nan */ k_expo2(x) } compiler_builtins-0.1.101/libm/src/math/coshf.rs000064400000000000000000000016161046102023000176260ustar 00000000000000use super::expf; use super::expm1f; use super::k_expo2f; /// Hyperbolic cosine (f64) /// /// Computes the hyperbolic cosine of the argument x. /// Is defined as `(exp(x) + exp(-x))/2` /// Angles are specified in radians. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn coshf(mut x: f32) -> f32 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 /* |x| */ let mut ix = x.to_bits(); ix &= 0x7fffffff; x = f32::from_bits(ix); let w = ix; /* |x| < log(2) */ if w < 0x3f317217 { if w < (0x3f800000 - (12 << 23)) { force_eval!(x + x1p120); return 1.; } let t = expm1f(x); return 1. + t * t / (2. * (1. + t)); } /* |x| < log(FLT_MAX) */ if w < 0x42b17217 { let t = expf(x); return 0.5 * (t + 1. / t); } /* |x| > log(FLT_MAX) or nan */ k_expo2f(x) } compiler_builtins-0.1.101/libm/src/math/erf.rs000064400000000000000000000305471046102023000173050ustar 00000000000000use super::{exp, fabs, get_high_word, with_set_low_word}; /* origin: FreeBSD /usr/src/lib/msun/src/s_erf.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* double erf(double x) * double erfc(double x) * x * 2 |\ * erf(x) = --------- | exp(-t*t)dt * sqrt(pi) \| * 0 * * erfc(x) = 1-erf(x) * Note that * erf(-x) = -erf(x) * erfc(-x) = 2 - erfc(x) * * Method: * 1. For |x| in [0, 0.84375] * erf(x) = x + x*R(x^2) * erfc(x) = 1 - erf(x) if x in [-.84375,0.25] * = 0.5 + ((0.5-x)-x*R) if x in [0.25,0.84375] * where R = P/Q where P is an odd poly of degree 8 and * Q is an odd poly of degree 10. * -57.90 * | R - (erf(x)-x)/x | <= 2 * * * Remark. The formula is derived by noting * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....) * and that * 2/sqrt(pi) = 1.128379167095512573896158903121545171688 * is close to one. The interval is chosen because the fix * point of erf(x) is near 0.6174 (i.e., erf(x)=x when x is * near 0.6174), and by some experiment, 0.84375 is chosen to * guarantee the error is less than one ulp for erf. * * 2. For |x| in [0.84375,1.25], let s = |x| - 1, and * c = 0.84506291151 rounded to single (24 bits) * erf(x) = sign(x) * (c + P1(s)/Q1(s)) * erfc(x) = (1-c) - P1(s)/Q1(s) if x > 0 * 1+(c+P1(s)/Q1(s)) if x < 0 * |P1/Q1 - (erf(|x|)-c)| <= 2**-59.06 * Remark: here we use the taylor series expansion at x=1. * erf(1+s) = erf(1) + s*Poly(s) * = 0.845.. + P1(s)/Q1(s) * That is, we use rational approximation to approximate * erf(1+s) - (c = (single)0.84506291151) * Note that |P1/Q1|< 0.078 for x in [0.84375,1.25] * where * P1(s) = degree 6 poly in s * Q1(s) = degree 6 poly in s * * 3. For x in [1.25,1/0.35(~2.857143)], * erfc(x) = (1/x)*exp(-x*x-0.5625+R1/S1) * erf(x) = 1 - erfc(x) * where * R1(z) = degree 7 poly in z, (z=1/x^2) * S1(z) = degree 8 poly in z * * 4. For x in [1/0.35,28] * erfc(x) = (1/x)*exp(-x*x-0.5625+R2/S2) if x > 0 * = 2.0 - (1/x)*exp(-x*x-0.5625+R2/S2) if -6 x >= 28 * erf(x) = sign(x) *(1 - tiny) (raise inexact) * erfc(x) = tiny*tiny (raise underflow) if x > 0 * = 2 - tiny if x<0 * * 7. Special case: * erf(0) = 0, erf(inf) = 1, erf(-inf) = -1, * erfc(0) = 1, erfc(inf) = 0, erfc(-inf) = 2, * erfc/erf(NaN) is NaN */ const ERX: f64 = 8.45062911510467529297e-01; /* 0x3FEB0AC1, 0x60000000 */ /* * Coefficients for approximation to erf on [0,0.84375] */ const EFX8: f64 = 1.02703333676410069053e+00; /* 0x3FF06EBA, 0x8214DB69 */ const PP0: f64 = 1.28379167095512558561e-01; /* 0x3FC06EBA, 0x8214DB68 */ const PP1: f64 = -3.25042107247001499370e-01; /* 0xBFD4CD7D, 0x691CB913 */ const PP2: f64 = -2.84817495755985104766e-02; /* 0xBF9D2A51, 0xDBD7194F */ const PP3: f64 = -5.77027029648944159157e-03; /* 0xBF77A291, 0x236668E4 */ const PP4: f64 = -2.37630166566501626084e-05; /* 0xBEF8EAD6, 0x120016AC */ const QQ1: f64 = 3.97917223959155352819e-01; /* 0x3FD97779, 0xCDDADC09 */ const QQ2: f64 = 6.50222499887672944485e-02; /* 0x3FB0A54C, 0x5536CEBA */ const QQ3: f64 = 5.08130628187576562776e-03; /* 0x3F74D022, 0xC4D36B0F */ const QQ4: f64 = 1.32494738004321644526e-04; /* 0x3F215DC9, 0x221C1A10 */ const QQ5: f64 = -3.96022827877536812320e-06; /* 0xBED09C43, 0x42A26120 */ /* * Coefficients for approximation to erf in [0.84375,1.25] */ const PA0: f64 = -2.36211856075265944077e-03; /* 0xBF6359B8, 0xBEF77538 */ const PA1: f64 = 4.14856118683748331666e-01; /* 0x3FDA8D00, 0xAD92B34D */ const PA2: f64 = -3.72207876035701323847e-01; /* 0xBFD7D240, 0xFBB8C3F1 */ const PA3: f64 = 3.18346619901161753674e-01; /* 0x3FD45FCA, 0x805120E4 */ const PA4: f64 = -1.10894694282396677476e-01; /* 0xBFBC6398, 0x3D3E28EC */ const PA5: f64 = 3.54783043256182359371e-02; /* 0x3FA22A36, 0x599795EB */ const PA6: f64 = -2.16637559486879084300e-03; /* 0xBF61BF38, 0x0A96073F */ const QA1: f64 = 1.06420880400844228286e-01; /* 0x3FBB3E66, 0x18EEE323 */ const QA2: f64 = 5.40397917702171048937e-01; /* 0x3FE14AF0, 0x92EB6F33 */ const QA3: f64 = 7.18286544141962662868e-02; /* 0x3FB2635C, 0xD99FE9A7 */ const QA4: f64 = 1.26171219808761642112e-01; /* 0x3FC02660, 0xE763351F */ const QA5: f64 = 1.36370839120290507362e-02; /* 0x3F8BEDC2, 0x6B51DD1C */ const QA6: f64 = 1.19844998467991074170e-02; /* 0x3F888B54, 0x5735151D */ /* * Coefficients for approximation to erfc in [1.25,1/0.35] */ const RA0: f64 = -9.86494403484714822705e-03; /* 0xBF843412, 0x600D6435 */ const RA1: f64 = -6.93858572707181764372e-01; /* 0xBFE63416, 0xE4BA7360 */ const RA2: f64 = -1.05586262253232909814e+01; /* 0xC0251E04, 0x41B0E726 */ const RA3: f64 = -6.23753324503260060396e+01; /* 0xC04F300A, 0xE4CBA38D */ const RA4: f64 = -1.62396669462573470355e+02; /* 0xC0644CB1, 0x84282266 */ const RA5: f64 = -1.84605092906711035994e+02; /* 0xC067135C, 0xEBCCABB2 */ const RA6: f64 = -8.12874355063065934246e+01; /* 0xC0545265, 0x57E4D2F2 */ const RA7: f64 = -9.81432934416914548592e+00; /* 0xC023A0EF, 0xC69AC25C */ const SA1: f64 = 1.96512716674392571292e+01; /* 0x4033A6B9, 0xBD707687 */ const SA2: f64 = 1.37657754143519042600e+02; /* 0x4061350C, 0x526AE721 */ const SA3: f64 = 4.34565877475229228821e+02; /* 0x407B290D, 0xD58A1A71 */ const SA4: f64 = 6.45387271733267880336e+02; /* 0x40842B19, 0x21EC2868 */ const SA5: f64 = 4.29008140027567833386e+02; /* 0x407AD021, 0x57700314 */ const SA6: f64 = 1.08635005541779435134e+02; /* 0x405B28A3, 0xEE48AE2C */ const SA7: f64 = 6.57024977031928170135e+00; /* 0x401A47EF, 0x8E484A93 */ const SA8: f64 = -6.04244152148580987438e-02; /* 0xBFAEEFF2, 0xEE749A62 */ /* * Coefficients for approximation to erfc in [1/.35,28] */ const RB0: f64 = -9.86494292470009928597e-03; /* 0xBF843412, 0x39E86F4A */ const RB1: f64 = -7.99283237680523006574e-01; /* 0xBFE993BA, 0x70C285DE */ const RB2: f64 = -1.77579549177547519889e+01; /* 0xC031C209, 0x555F995A */ const RB3: f64 = -1.60636384855821916062e+02; /* 0xC064145D, 0x43C5ED98 */ const RB4: f64 = -6.37566443368389627722e+02; /* 0xC083EC88, 0x1375F228 */ const RB5: f64 = -1.02509513161107724954e+03; /* 0xC0900461, 0x6A2E5992 */ const RB6: f64 = -4.83519191608651397019e+02; /* 0xC07E384E, 0x9BDC383F */ const SB1: f64 = 3.03380607434824582924e+01; /* 0x403E568B, 0x261D5190 */ const SB2: f64 = 3.25792512996573918826e+02; /* 0x40745CAE, 0x221B9F0A */ const SB3: f64 = 1.53672958608443695994e+03; /* 0x409802EB, 0x189D5118 */ const SB4: f64 = 3.19985821950859553908e+03; /* 0x40A8FFB7, 0x688C246A */ const SB5: f64 = 2.55305040643316442583e+03; /* 0x40A3F219, 0xCEDF3BE6 */ const SB6: f64 = 4.74528541206955367215e+02; /* 0x407DA874, 0xE79FE763 */ const SB7: f64 = -2.24409524465858183362e+01; /* 0xC03670E2, 0x42712D62 */ fn erfc1(x: f64) -> f64 { let s: f64; let p: f64; let q: f64; s = fabs(x) - 1.0; p = PA0 + s * (PA1 + s * (PA2 + s * (PA3 + s * (PA4 + s * (PA5 + s * PA6))))); q = 1.0 + s * (QA1 + s * (QA2 + s * (QA3 + s * (QA4 + s * (QA5 + s * QA6))))); 1.0 - ERX - p / q } fn erfc2(ix: u32, mut x: f64) -> f64 { let s: f64; let r: f64; let big_s: f64; let z: f64; if ix < 0x3ff40000 { /* |x| < 1.25 */ return erfc1(x); } x = fabs(x); s = 1.0 / (x * x); if ix < 0x4006db6d { /* |x| < 1/.35 ~ 2.85714 */ r = RA0 + s * (RA1 + s * (RA2 + s * (RA3 + s * (RA4 + s * (RA5 + s * (RA6 + s * RA7)))))); big_s = 1.0 + s * (SA1 + s * (SA2 + s * (SA3 + s * (SA4 + s * (SA5 + s * (SA6 + s * (SA7 + s * SA8))))))); } else { /* |x| > 1/.35 */ r = RB0 + s * (RB1 + s * (RB2 + s * (RB3 + s * (RB4 + s * (RB5 + s * RB6))))); big_s = 1.0 + s * (SB1 + s * (SB2 + s * (SB3 + s * (SB4 + s * (SB5 + s * (SB6 + s * SB7)))))); } z = with_set_low_word(x, 0); exp(-z * z - 0.5625) * exp((z - x) * (z + x) + r / big_s) / x } /// Error function (f64) /// /// Calculates an approximation to the “error function”, which estimates /// the probability that an observation will fall within x standard /// deviations of the mean (assuming a normal distribution). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn erf(x: f64) -> f64 { let r: f64; let s: f64; let z: f64; let y: f64; let mut ix: u32; let sign: usize; ix = get_high_word(x); sign = (ix >> 31) as usize; ix &= 0x7fffffff; if ix >= 0x7ff00000 { /* erf(nan)=nan, erf(+-inf)=+-1 */ return 1.0 - 2.0 * (sign as f64) + 1.0 / x; } if ix < 0x3feb0000 { /* |x| < 0.84375 */ if ix < 0x3e300000 { /* |x| < 2**-28 */ /* avoid underflow */ return 0.125 * (8.0 * x + EFX8 * x); } z = x * x; r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); y = r / s; return x + x * y; } if ix < 0x40180000 { /* 0.84375 <= |x| < 6 */ y = 1.0 - erfc2(ix, x); } else { let x1p_1022 = f64::from_bits(0x0010000000000000); y = 1.0 - x1p_1022; } if sign != 0 { -y } else { y } } /// Complementary error function (f64) /// /// Calculates the complementary probability. /// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid /// the loss of precision that would result from subtracting /// large probabilities (on large `x`) from 1. pub fn erfc(x: f64) -> f64 { let r: f64; let s: f64; let z: f64; let y: f64; let mut ix: u32; let sign: usize; ix = get_high_word(x); sign = (ix >> 31) as usize; ix &= 0x7fffffff; if ix >= 0x7ff00000 { /* erfc(nan)=nan, erfc(+-inf)=0,2 */ return 2.0 * (sign as f64) + 1.0 / x; } if ix < 0x3feb0000 { /* |x| < 0.84375 */ if ix < 0x3c700000 { /* |x| < 2**-56 */ return 1.0 - x; } z = x * x; r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); y = r / s; if sign != 0 || ix < 0x3fd00000 { /* x < 1/4 */ return 1.0 - (x + x * y); } return 0.5 - (x - 0.5 + x * y); } if ix < 0x403c0000 { /* 0.84375 <= |x| < 28 */ if sign != 0 { return 2.0 - erfc2(ix, x); } else { return erfc2(ix, x); } } let x1p_1022 = f64::from_bits(0x0010000000000000); if sign != 0 { 2.0 - x1p_1022 } else { x1p_1022 * x1p_1022 } } compiler_builtins-0.1.101/libm/src/math/erff.rs000064400000000000000000000166601046102023000174530ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_erff.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{expf, fabsf}; const ERX: f32 = 8.4506291151e-01; /* 0x3f58560b */ /* * Coefficients for approximation to erf on [0,0.84375] */ const EFX8: f32 = 1.0270333290e+00; /* 0x3f8375d4 */ const PP0: f32 = 1.2837916613e-01; /* 0x3e0375d4 */ const PP1: f32 = -3.2504209876e-01; /* 0xbea66beb */ const PP2: f32 = -2.8481749818e-02; /* 0xbce9528f */ const PP3: f32 = -5.7702702470e-03; /* 0xbbbd1489 */ const PP4: f32 = -2.3763017452e-05; /* 0xb7c756b1 */ const QQ1: f32 = 3.9791721106e-01; /* 0x3ecbbbce */ const QQ2: f32 = 6.5022252500e-02; /* 0x3d852a63 */ const QQ3: f32 = 5.0813062117e-03; /* 0x3ba68116 */ const QQ4: f32 = 1.3249473704e-04; /* 0x390aee49 */ const QQ5: f32 = -3.9602282413e-06; /* 0xb684e21a */ /* * Coefficients for approximation to erf in [0.84375,1.25] */ const PA0: f32 = -2.3621185683e-03; /* 0xbb1acdc6 */ const PA1: f32 = 4.1485610604e-01; /* 0x3ed46805 */ const PA2: f32 = -3.7220788002e-01; /* 0xbebe9208 */ const PA3: f32 = 3.1834661961e-01; /* 0x3ea2fe54 */ const PA4: f32 = -1.1089469492e-01; /* 0xbde31cc2 */ const PA5: f32 = 3.5478305072e-02; /* 0x3d1151b3 */ const PA6: f32 = -2.1663755178e-03; /* 0xbb0df9c0 */ const QA1: f32 = 1.0642088205e-01; /* 0x3dd9f331 */ const QA2: f32 = 5.4039794207e-01; /* 0x3f0a5785 */ const QA3: f32 = 7.1828655899e-02; /* 0x3d931ae7 */ const QA4: f32 = 1.2617121637e-01; /* 0x3e013307 */ const QA5: f32 = 1.3637083583e-02; /* 0x3c5f6e13 */ const QA6: f32 = 1.1984500103e-02; /* 0x3c445aa3 */ /* * Coefficients for approximation to erfc in [1.25,1/0.35] */ const RA0: f32 = -9.8649440333e-03; /* 0xbc21a093 */ const RA1: f32 = -6.9385856390e-01; /* 0xbf31a0b7 */ const RA2: f32 = -1.0558626175e+01; /* 0xc128f022 */ const RA3: f32 = -6.2375331879e+01; /* 0xc2798057 */ const RA4: f32 = -1.6239666748e+02; /* 0xc322658c */ const RA5: f32 = -1.8460508728e+02; /* 0xc3389ae7 */ const RA6: f32 = -8.1287437439e+01; /* 0xc2a2932b */ const RA7: f32 = -9.8143291473e+00; /* 0xc11d077e */ const SA1: f32 = 1.9651271820e+01; /* 0x419d35ce */ const SA2: f32 = 1.3765776062e+02; /* 0x4309a863 */ const SA3: f32 = 4.3456588745e+02; /* 0x43d9486f */ const SA4: f32 = 6.4538726807e+02; /* 0x442158c9 */ const SA5: f32 = 4.2900814819e+02; /* 0x43d6810b */ const SA6: f32 = 1.0863500214e+02; /* 0x42d9451f */ const SA7: f32 = 6.5702495575e+00; /* 0x40d23f7c */ const SA8: f32 = -6.0424413532e-02; /* 0xbd777f97 */ /* * Coefficients for approximation to erfc in [1/.35,28] */ const RB0: f32 = -9.8649431020e-03; /* 0xbc21a092 */ const RB1: f32 = -7.9928326607e-01; /* 0xbf4c9dd4 */ const RB2: f32 = -1.7757955551e+01; /* 0xc18e104b */ const RB3: f32 = -1.6063638306e+02; /* 0xc320a2ea */ const RB4: f32 = -6.3756646729e+02; /* 0xc41f6441 */ const RB5: f32 = -1.0250950928e+03; /* 0xc480230b */ const RB6: f32 = -4.8351919556e+02; /* 0xc3f1c275 */ const SB1: f32 = 3.0338060379e+01; /* 0x41f2b459 */ const SB2: f32 = 3.2579251099e+02; /* 0x43a2e571 */ const SB3: f32 = 1.5367296143e+03; /* 0x44c01759 */ const SB4: f32 = 3.1998581543e+03; /* 0x4547fdbb */ const SB5: f32 = 2.5530502930e+03; /* 0x451f90ce */ const SB6: f32 = 4.7452853394e+02; /* 0x43ed43a7 */ const SB7: f32 = -2.2440952301e+01; /* 0xc1b38712 */ fn erfc1(x: f32) -> f32 { let s: f32; let p: f32; let q: f32; s = fabsf(x) - 1.0; p = PA0 + s * (PA1 + s * (PA2 + s * (PA3 + s * (PA4 + s * (PA5 + s * PA6))))); q = 1.0 + s * (QA1 + s * (QA2 + s * (QA3 + s * (QA4 + s * (QA5 + s * QA6))))); return 1.0 - ERX - p / q; } fn erfc2(mut ix: u32, mut x: f32) -> f32 { let s: f32; let r: f32; let big_s: f32; let z: f32; if ix < 0x3fa00000 { /* |x| < 1.25 */ return erfc1(x); } x = fabsf(x); s = 1.0 / (x * x); if ix < 0x4036db6d { /* |x| < 1/0.35 */ r = RA0 + s * (RA1 + s * (RA2 + s * (RA3 + s * (RA4 + s * (RA5 + s * (RA6 + s * RA7)))))); big_s = 1.0 + s * (SA1 + s * (SA2 + s * (SA3 + s * (SA4 + s * (SA5 + s * (SA6 + s * (SA7 + s * SA8))))))); } else { /* |x| >= 1/0.35 */ r = RB0 + s * (RB1 + s * (RB2 + s * (RB3 + s * (RB4 + s * (RB5 + s * RB6))))); big_s = 1.0 + s * (SB1 + s * (SB2 + s * (SB3 + s * (SB4 + s * (SB5 + s * (SB6 + s * SB7)))))); } ix = x.to_bits(); z = f32::from_bits(ix & 0xffffe000); expf(-z * z - 0.5625) * expf((z - x) * (z + x) + r / big_s) / x } /// Error function (f32) /// /// Calculates an approximation to the “error function”, which estimates /// the probability that an observation will fall within x standard /// deviations of the mean (assuming a normal distribution). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn erff(x: f32) -> f32 { let r: f32; let s: f32; let z: f32; let y: f32; let mut ix: u32; let sign: usize; ix = x.to_bits(); sign = (ix >> 31) as usize; ix &= 0x7fffffff; if ix >= 0x7f800000 { /* erf(nan)=nan, erf(+-inf)=+-1 */ return 1.0 - 2.0 * (sign as f32) + 1.0 / x; } if ix < 0x3f580000 { /* |x| < 0.84375 */ if ix < 0x31800000 { /* |x| < 2**-28 */ /*avoid underflow */ return 0.125 * (8.0 * x + EFX8 * x); } z = x * x; r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); y = r / s; return x + x * y; } if ix < 0x40c00000 { /* |x| < 6 */ y = 1.0 - erfc2(ix, x); } else { let x1p_120 = f32::from_bits(0x03800000); y = 1.0 - x1p_120; } if sign != 0 { -y } else { y } } /// Complementary error function (f32) /// /// Calculates the complementary probability. /// Is `1 - erf(x)`. Is computed directly, so that you can use it to avoid /// the loss of precision that would result from subtracting /// large probabilities (on large `x`) from 1. pub fn erfcf(x: f32) -> f32 { let r: f32; let s: f32; let z: f32; let y: f32; let mut ix: u32; let sign: usize; ix = x.to_bits(); sign = (ix >> 31) as usize; ix &= 0x7fffffff; if ix >= 0x7f800000 { /* erfc(nan)=nan, erfc(+-inf)=0,2 */ return 2.0 * (sign as f32) + 1.0 / x; } if ix < 0x3f580000 { /* |x| < 0.84375 */ if ix < 0x23800000 { /* |x| < 2**-56 */ return 1.0 - x; } z = x * x; r = PP0 + z * (PP1 + z * (PP2 + z * (PP3 + z * PP4))); s = 1.0 + z * (QQ1 + z * (QQ2 + z * (QQ3 + z * (QQ4 + z * QQ5)))); y = r / s; if sign != 0 || ix < 0x3e800000 { /* x < 1/4 */ return 1.0 - (x + x * y); } return 0.5 - (x - 0.5 + x * y); } if ix < 0x41e00000 { /* |x| < 28 */ if sign != 0 { return 2.0 - erfc2(ix, x); } else { return erfc2(ix, x); } } let x1p_120 = f32::from_bits(0x03800000); if sign != 0 { 2.0 - x1p_120 } else { x1p_120 * x1p_120 } } compiler_builtins-0.1.101/libm/src/math/exp.rs000064400000000000000000000117531046102023000173230ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_exp.c */ /* * ==================================================== * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. * * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* exp(x) * Returns the exponential of x. * * Method * 1. Argument reduction: * Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. * Given x, find r and integer k such that * * x = k*ln2 + r, |r| <= 0.5*ln2. * * Here r will be represented as r = hi-lo for better * accuracy. * * 2. Approximation of exp(r) by a special rational function on * the interval [0,0.34658]: * Write * R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... * We use a special Remez algorithm on [0,0.34658] to generate * a polynomial of degree 5 to approximate R. The maximum error * of this polynomial approximation is bounded by 2**-59. In * other words, * R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 * (where z=r*r, and the values of P1 to P5 are listed below) * and * | 5 | -59 * | 2.0+P1*z+...+P5*z - R(z) | <= 2 * | | * The computation of exp(r) thus becomes * 2*r * exp(r) = 1 + ---------- * R(r) - r * r*c(r) * = 1 + r + ----------- (for better accuracy) * 2 - c(r) * where * 2 4 10 * c(r) = r - (P1*r + P2*r + ... + P5*r ). * * 3. Scale back to obtain exp(x): * From step 1, we have * exp(x) = 2^k * exp(r) * * Special cases: * exp(INF) is INF, exp(NaN) is NaN; * exp(-INF) is 0, and * for finite argument, only exp(0)=1 is exact. * * Accuracy: * according to an error analysis, the error is always less than * 1 ulp (unit in the last place). * * Misc. info. * For IEEE double * if x > 709.782712893383973096 then exp(x) overflows * if x < -745.133219101941108420 then exp(x) underflows */ use super::scalbn; const HALF: [f64; 2] = [0.5, -0.5]; const LN2HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ const LN2LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ const P1: f64 = 1.66666666666666019037e-01; /* 0x3FC55555, 0x5555553E */ const P2: f64 = -2.77777777770155933842e-03; /* 0xBF66C16C, 0x16BEBD93 */ const P3: f64 = 6.61375632143793436117e-05; /* 0x3F11566A, 0xAF25DE2C */ const P4: f64 = -1.65339022054652515390e-06; /* 0xBEBBBD41, 0xC5D26BF1 */ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ /// Exponential, base *e* (f64) /// /// Calculate the exponential of `x`, that is, *e* raised to the power `x` /// (where *e* is the base of the natural system of logarithms, approximately 2.71828). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp(mut x: f64) -> f64 { let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149 let hi: f64; let lo: f64; let c: f64; let xx: f64; let y: f64; let k: i32; let sign: i32; let mut hx: u32; hx = (x.to_bits() >> 32) as u32; sign = (hx >> 31) as i32; hx &= 0x7fffffff; /* high word of |x| */ /* special cases */ if hx >= 0x4086232b { /* if |x| >= 708.39... */ if x.is_nan() { return x; } if x > 709.782712893383973096 { /* overflow if x!=inf */ x *= x1p1023; return x; } if x < -708.39641853226410622 { /* underflow if x!=-inf */ force_eval!((-x1p_149 / x) as f32); if x < -745.13321910194110842 { return 0.; } } } /* argument reduction */ if hx > 0x3fd62e42 { /* if |x| > 0.5 ln2 */ if hx >= 0x3ff0a2b2 { /* if |x| >= 1.5 ln2 */ k = (INVLN2 * x + i!(HALF, sign as usize)) as i32; } else { k = 1 - sign - sign; } hi = x - k as f64 * LN2HI; /* k*ln2hi is exact here */ lo = k as f64 * LN2LO; x = hi - lo; } else if hx > 0x3e300000 { /* if |x| > 2**-28 */ k = 0; hi = x; lo = 0.; } else { /* inexact if x!=0 */ force_eval!(x1p1023 + x); return 1. + x; } /* x is now in primary range */ xx = x * x; c = x - xx * (P1 + xx * (P2 + xx * (P3 + xx * (P4 + xx * P5)))); y = 1. + (x * c / (2. - c) - lo + hi); if k == 0 { y } else { scalbn(y, k) } } compiler_builtins-0.1.101/libm/src/math/exp10.rs000064400000000000000000000013621046102023000174570ustar 00000000000000use super::{exp2, modf, pow}; const LN10: f64 = 3.32192809488736234787031942948939; const P10: &[f64] = &[ 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, ]; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp10(x: f64) -> f64 { let (mut y, n) = modf(x); let u: u64 = n.to_bits(); /* fabs(n) < 16 without raising invalid on nan */ if (u >> 52 & 0x7ff) < 0x3ff + 4 { if y == 0.0 { return i!(P10, ((n as isize) + 15) as usize); } y = exp2(LN10 * y); return y * i!(P10, ((n as isize) + 15) as usize); } return pow(10.0, x); } compiler_builtins-0.1.101/libm/src/math/exp10f.rs000064400000000000000000000013371046102023000176270ustar 00000000000000use super::{exp2, exp2f, modff}; const LN10_F32: f32 = 3.32192809488736234787031942948939; const LN10_F64: f64 = 3.32192809488736234787031942948939; const P10: &[f32] = &[ 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, ]; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp10f(x: f32) -> f32 { let (mut y, n) = modff(x); let u = n.to_bits(); /* fabsf(n) < 8 without raising invalid on nan */ if (u >> 23 & 0xff) < 0x7f + 3 { if y == 0.0 { return i!(P10, ((n as isize) + 7) as usize); } y = exp2f(LN10_F32 * y); return y * i!(P10, ((n as isize) + 7) as usize); } return exp2(LN10_F64 * (x as f64)) as f32; } compiler_builtins-0.1.101/libm/src/math/exp2.rs000064400000000000000000000400641046102023000174020ustar 00000000000000// origin: FreeBSD /usr/src/lib/msun/src/s_exp2.c */ //- // Copyright (c) 2005 David Schultz // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF // SUCH DAMAGE. use super::scalbn; const TBLSIZE: usize = 256; #[cfg_attr(rustfmt, rustfmt_skip)] static TBL: [u64; TBLSIZE * 2] = [ // exp2(z + eps) eps 0x3fe6a09e667f3d5d, 0x3d39880000000000, 0x3fe6b052fa751744, 0x3cd8000000000000, 0x3fe6c012750bd9fe, 0xbd28780000000000, 0x3fe6cfdcddd476bf, 0x3d1ec00000000000, 0x3fe6dfb23c651a29, 0xbcd8000000000000, 0x3fe6ef9298593ae3, 0xbcbc000000000000, 0x3fe6ff7df9519386, 0xbd2fd80000000000, 0x3fe70f7466f42da3, 0xbd2c880000000000, 0x3fe71f75e8ec5fc3, 0x3d13c00000000000, 0x3fe72f8286eacf05, 0xbd38300000000000, 0x3fe73f9a48a58152, 0xbd00c00000000000, 0x3fe74fbd35d7ccfc, 0x3d2f880000000000, 0x3fe75feb564267f1, 0x3d03e00000000000, 0x3fe77024b1ab6d48, 0xbd27d00000000000, 0x3fe780694fde5d38, 0xbcdd000000000000, 0x3fe790b938ac1d00, 0x3ce3000000000000, 0x3fe7a11473eb0178, 0xbced000000000000, 0x3fe7b17b0976d060, 0x3d20400000000000, 0x3fe7c1ed0130c133, 0x3ca0000000000000, 0x3fe7d26a62ff8636, 0xbd26900000000000, 0x3fe7e2f336cf4e3b, 0xbd02e00000000000, 0x3fe7f3878491c3e8, 0xbd24580000000000, 0x3fe80427543e1b4e, 0x3d33000000000000, 0x3fe814d2add1071a, 0x3d0f000000000000, 0x3fe82589994ccd7e, 0xbd21c00000000000, 0x3fe8364c1eb942d0, 0x3d29d00000000000, 0x3fe8471a4623cab5, 0x3d47100000000000, 0x3fe857f4179f5bbc, 0x3d22600000000000, 0x3fe868d99b4491af, 0xbd32c40000000000, 0x3fe879cad931a395, 0xbd23000000000000, 0x3fe88ac7d98a65b8, 0xbd2a800000000000, 0x3fe89bd0a4785800, 0xbced000000000000, 0x3fe8ace5422aa223, 0x3d33280000000000, 0x3fe8be05bad619fa, 0x3d42b40000000000, 0x3fe8cf3216b54383, 0xbd2ed00000000000, 0x3fe8e06a5e08664c, 0xbd20500000000000, 0x3fe8f1ae99157807, 0x3d28280000000000, 0x3fe902fed0282c0e, 0xbd1cb00000000000, 0x3fe9145b0b91ff96, 0xbd05e00000000000, 0x3fe925c353aa2ff9, 0x3cf5400000000000, 0x3fe93737b0cdc64a, 0x3d17200000000000, 0x3fe948b82b5f98ae, 0xbd09000000000000, 0x3fe95a44cbc852cb, 0x3d25680000000000, 0x3fe96bdd9a766f21, 0xbd36d00000000000, 0x3fe97d829fde4e2a, 0xbd01000000000000, 0x3fe98f33e47a23a3, 0x3d2d000000000000, 0x3fe9a0f170ca0604, 0xbd38a40000000000, 0x3fe9b2bb4d53ff89, 0x3d355c0000000000, 0x3fe9c49182a3f15b, 0x3d26b80000000000, 0x3fe9d674194bb8c5, 0xbcec000000000000, 0x3fe9e86319e3238e, 0x3d17d00000000000, 0x3fe9fa5e8d07f302, 0x3d16400000000000, 0x3fea0c667b5de54d, 0xbcf5000000000000, 0x3fea1e7aed8eb8f6, 0x3d09e00000000000, 0x3fea309bec4a2e27, 0x3d2ad80000000000, 0x3fea42c980460a5d, 0xbd1af00000000000, 0x3fea5503b23e259b, 0x3d0b600000000000, 0x3fea674a8af46213, 0x3d38880000000000, 0x3fea799e1330b3a7, 0x3d11200000000000, 0x3fea8bfe53c12e8d, 0x3d06c00000000000, 0x3fea9e6b5579fcd2, 0xbd29b80000000000, 0x3feab0e521356fb8, 0x3d2b700000000000, 0x3feac36bbfd3f381, 0x3cd9000000000000, 0x3fead5ff3a3c2780, 0x3ce4000000000000, 0x3feae89f995ad2a3, 0xbd2c900000000000, 0x3feafb4ce622f367, 0x3d16500000000000, 0x3feb0e07298db790, 0x3d2fd40000000000, 0x3feb20ce6c9a89a9, 0x3d12700000000000, 0x3feb33a2b84f1a4b, 0x3d4d470000000000, 0x3feb468415b747e7, 0xbd38380000000000, 0x3feb59728de5593a, 0x3c98000000000000, 0x3feb6c6e29f1c56a, 0x3d0ad00000000000, 0x3feb7f76f2fb5e50, 0x3cde800000000000, 0x3feb928cf22749b2, 0xbd04c00000000000, 0x3feba5b030a10603, 0xbd0d700000000000, 0x3febb8e0b79a6f66, 0x3d0d900000000000, 0x3febcc1e904bc1ff, 0x3d02a00000000000, 0x3febdf69c3f3a16f, 0xbd1f780000000000, 0x3febf2c25bd71db8, 0xbd10a00000000000, 0x3fec06286141b2e9, 0xbd11400000000000, 0x3fec199bdd8552e0, 0x3d0be00000000000, 0x3fec2d1cd9fa64ee, 0xbd09400000000000, 0x3fec40ab5fffd02f, 0xbd0ed00000000000, 0x3fec544778fafd15, 0x3d39660000000000, 0x3fec67f12e57d0cb, 0xbd1a100000000000, 0x3fec7ba88988c1b6, 0xbd58458000000000, 0x3fec8f6d9406e733, 0xbd1a480000000000, 0x3feca3405751c4df, 0x3ccb000000000000, 0x3fecb720dcef9094, 0x3d01400000000000, 0x3feccb0f2e6d1689, 0x3cf0200000000000, 0x3fecdf0b555dc412, 0x3cf3600000000000, 0x3fecf3155b5bab3b, 0xbd06900000000000, 0x3fed072d4a0789bc, 0x3d09a00000000000, 0x3fed1b532b08c8fa, 0xbd15e00000000000, 0x3fed2f87080d8a85, 0x3d1d280000000000, 0x3fed43c8eacaa203, 0x3d01a00000000000, 0x3fed5818dcfba491, 0x3cdf000000000000, 0x3fed6c76e862e6a1, 0xbd03a00000000000, 0x3fed80e316c9834e, 0xbd0cd80000000000, 0x3fed955d71ff6090, 0x3cf4c00000000000, 0x3feda9e603db32ae, 0x3cff900000000000, 0x3fedbe7cd63a8325, 0x3ce9800000000000, 0x3fedd321f301b445, 0xbcf5200000000000, 0x3fede7d5641c05bf, 0xbd1d700000000000, 0x3fedfc97337b9aec, 0xbd16140000000000, 0x3fee11676b197d5e, 0x3d0b480000000000, 0x3fee264614f5a3e7, 0x3d40ce0000000000, 0x3fee3b333b16ee5c, 0x3d0c680000000000, 0x3fee502ee78b3fb4, 0xbd09300000000000, 0x3fee653924676d68, 0xbce5000000000000, 0x3fee7a51fbc74c44, 0xbd07f80000000000, 0x3fee8f7977cdb726, 0xbcf3700000000000, 0x3feea4afa2a490e8, 0x3ce5d00000000000, 0x3feeb9f4867ccae4, 0x3d161a0000000000, 0x3feecf482d8e680d, 0x3cf5500000000000, 0x3feee4aaa2188514, 0x3cc6400000000000, 0x3feefa1bee615a13, 0xbcee800000000000, 0x3fef0f9c1cb64106, 0xbcfa880000000000, 0x3fef252b376bb963, 0xbd2c900000000000, 0x3fef3ac948dd7275, 0x3caa000000000000, 0x3fef50765b6e4524, 0xbcf4f00000000000, 0x3fef6632798844fd, 0x3cca800000000000, 0x3fef7bfdad9cbe38, 0x3cfabc0000000000, 0x3fef91d802243c82, 0xbcd4600000000000, 0x3fefa7c1819e908e, 0xbd0b0c0000000000, 0x3fefbdba3692d511, 0xbcc0e00000000000, 0x3fefd3c22b8f7194, 0xbd10de8000000000, 0x3fefe9d96b2a23ee, 0x3cee430000000000, 0x3ff0000000000000, 0x0, 0x3ff00b1afa5abcbe, 0xbcb3400000000000, 0x3ff0163da9fb3303, 0xbd12170000000000, 0x3ff02168143b0282, 0x3cba400000000000, 0x3ff02c9a3e77806c, 0x3cef980000000000, 0x3ff037d42e11bbca, 0xbcc7400000000000, 0x3ff04315e86e7f89, 0x3cd8300000000000, 0x3ff04e5f72f65467, 0xbd1a3f0000000000, 0x3ff059b0d315855a, 0xbd02840000000000, 0x3ff0650a0e3c1f95, 0x3cf1600000000000, 0x3ff0706b29ddf71a, 0x3d15240000000000, 0x3ff07bd42b72a82d, 0xbce9a00000000000, 0x3ff0874518759bd0, 0x3ce6400000000000, 0x3ff092bdf66607c8, 0xbd00780000000000, 0x3ff09e3ecac6f383, 0xbc98000000000000, 0x3ff0a9c79b1f3930, 0x3cffa00000000000, 0x3ff0b5586cf988fc, 0xbcfac80000000000, 0x3ff0c0f145e46c8a, 0x3cd9c00000000000, 0x3ff0cc922b724816, 0x3d05200000000000, 0x3ff0d83b23395dd8, 0xbcfad00000000000, 0x3ff0e3ec32d3d1f3, 0x3d1bac0000000000, 0x3ff0efa55fdfa9a6, 0xbd04e80000000000, 0x3ff0fb66affed2f0, 0xbd0d300000000000, 0x3ff1073028d7234b, 0x3cf1500000000000, 0x3ff11301d0125b5b, 0x3cec000000000000, 0x3ff11edbab5e2af9, 0x3d16bc0000000000, 0x3ff12abdc06c31d5, 0x3ce8400000000000, 0x3ff136a814f2047d, 0xbd0ed00000000000, 0x3ff1429aaea92de9, 0x3ce8e00000000000, 0x3ff14e95934f3138, 0x3ceb400000000000, 0x3ff15a98c8a58e71, 0x3d05300000000000, 0x3ff166a45471c3df, 0x3d03380000000000, 0x3ff172b83c7d5211, 0x3d28d40000000000, 0x3ff17ed48695bb9f, 0xbd05d00000000000, 0x3ff18af9388c8d93, 0xbd1c880000000000, 0x3ff1972658375d66, 0x3d11f00000000000, 0x3ff1a35beb6fcba7, 0x3d10480000000000, 0x3ff1af99f81387e3, 0xbd47390000000000, 0x3ff1bbe084045d54, 0x3d24e40000000000, 0x3ff1c82f95281c43, 0xbd0a200000000000, 0x3ff1d4873168b9b2, 0x3ce3800000000000, 0x3ff1e0e75eb44031, 0x3ceac00000000000, 0x3ff1ed5022fcd938, 0x3d01900000000000, 0x3ff1f9c18438cdf7, 0xbd1b780000000000, 0x3ff2063b88628d8f, 0x3d2d940000000000, 0x3ff212be3578a81e, 0x3cd8000000000000, 0x3ff21f49917ddd41, 0x3d2b340000000000, 0x3ff22bdda2791323, 0x3d19f80000000000, 0x3ff2387a6e7561e7, 0xbd19c80000000000, 0x3ff2451ffb821427, 0x3d02300000000000, 0x3ff251ce4fb2a602, 0xbd13480000000000, 0x3ff25e85711eceb0, 0x3d12700000000000, 0x3ff26b4565e27d16, 0x3d11d00000000000, 0x3ff2780e341de00f, 0x3d31ee0000000000, 0x3ff284dfe1f5633e, 0xbd14c00000000000, 0x3ff291ba7591bb30, 0xbd13d80000000000, 0x3ff29e9df51fdf09, 0x3d08b00000000000, 0x3ff2ab8a66d10e9b, 0xbd227c0000000000, 0x3ff2b87fd0dada3a, 0x3d2a340000000000, 0x3ff2c57e39771af9, 0xbd10800000000000, 0x3ff2d285a6e402d9, 0xbd0ed00000000000, 0x3ff2df961f641579, 0xbcf4200000000000, 0x3ff2ecafa93e2ecf, 0xbd24980000000000, 0x3ff2f9d24abd8822, 0xbd16300000000000, 0x3ff306fe0a31b625, 0xbd32360000000000, 0x3ff31432edeea50b, 0xbd70df8000000000, 0x3ff32170fc4cd7b8, 0xbd22480000000000, 0x3ff32eb83ba8e9a2, 0xbd25980000000000, 0x3ff33c08b2641766, 0x3d1ed00000000000, 0x3ff3496266e3fa27, 0xbcdc000000000000, 0x3ff356c55f929f0f, 0xbd30d80000000000, 0x3ff36431a2de88b9, 0x3d22c80000000000, 0x3ff371a7373aaa39, 0x3d20600000000000, 0x3ff37f26231e74fe, 0xbd16600000000000, 0x3ff38cae6d05d838, 0xbd0ae00000000000, 0x3ff39a401b713ec3, 0xbd44720000000000, 0x3ff3a7db34e5a020, 0x3d08200000000000, 0x3ff3b57fbfec6e95, 0x3d3e800000000000, 0x3ff3c32dc313a8f2, 0x3cef800000000000, 0x3ff3d0e544ede122, 0xbd17a00000000000, 0x3ff3dea64c1234bb, 0x3d26300000000000, 0x3ff3ec70df1c4ecc, 0xbd48a60000000000, 0x3ff3fa4504ac7e8c, 0xbd3cdc0000000000, 0x3ff40822c367a0bb, 0x3d25b80000000000, 0x3ff4160a21f72e95, 0x3d1ec00000000000, 0x3ff423fb27094646, 0xbd13600000000000, 0x3ff431f5d950a920, 0x3d23980000000000, 0x3ff43ffa3f84b9eb, 0x3cfa000000000000, 0x3ff44e0860618919, 0xbcf6c00000000000, 0x3ff45c2042a7d201, 0xbd0bc00000000000, 0x3ff46a41ed1d0016, 0xbd12800000000000, 0x3ff4786d668b3326, 0x3d30e00000000000, 0x3ff486a2b5c13c00, 0xbd2d400000000000, 0x3ff494e1e192af04, 0x3d0c200000000000, 0x3ff4a32af0d7d372, 0xbd1e500000000000, 0x3ff4b17dea6db801, 0x3d07800000000000, 0x3ff4bfdad53629e1, 0xbd13800000000000, 0x3ff4ce41b817c132, 0x3d00800000000000, 0x3ff4dcb299fddddb, 0x3d2c700000000000, 0x3ff4eb2d81d8ab96, 0xbd1ce00000000000, 0x3ff4f9b2769d2d02, 0x3d19200000000000, 0x3ff508417f4531c1, 0xbd08c00000000000, 0x3ff516daa2cf662a, 0xbcfa000000000000, 0x3ff5257de83f51ea, 0x3d4a080000000000, 0x3ff5342b569d4eda, 0xbd26d80000000000, 0x3ff542e2f4f6ac1a, 0xbd32440000000000, 0x3ff551a4ca5d94db, 0x3d483c0000000000, 0x3ff56070dde9116b, 0x3d24b00000000000, 0x3ff56f4736b529de, 0x3d415a0000000000, 0x3ff57e27dbe2c40e, 0xbd29e00000000000, 0x3ff58d12d497c76f, 0xbd23080000000000, 0x3ff59c0827ff0b4c, 0x3d4dec0000000000, 0x3ff5ab07dd485427, 0xbcc4000000000000, 0x3ff5ba11fba87af4, 0x3d30080000000000, 0x3ff5c9268a59460b, 0xbd26c80000000000, 0x3ff5d84590998e3f, 0x3d469a0000000000, 0x3ff5e76f15ad20e1, 0xbd1b400000000000, 0x3ff5f6a320dcebca, 0x3d17700000000000, 0x3ff605e1b976dcb8, 0x3d26f80000000000, 0x3ff6152ae6cdf715, 0x3d01000000000000, 0x3ff6247eb03a5531, 0xbd15d00000000000, 0x3ff633dd1d1929b5, 0xbd12d00000000000, 0x3ff6434634ccc313, 0xbcea800000000000, 0x3ff652b9febc8efa, 0xbd28600000000000, 0x3ff6623882553397, 0x3d71fe0000000000, 0x3ff671c1c708328e, 0xbd37200000000000, 0x3ff68155d44ca97e, 0x3ce6800000000000, 0x3ff690f4b19e9471, 0xbd29780000000000, ]; // exp2(x): compute the base 2 exponential of x // // Accuracy: Peak error < 0.503 ulp for normalized results. // // Method: (accurate tables) // // Reduce x: // x = k + y, for integer k and |y| <= 1/2. // Thus we have exp2(x) = 2**k * exp2(y). // // Reduce y: // y = i/TBLSIZE + z - eps[i] for integer i near y * TBLSIZE. // Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z - eps[i]), // with |z - eps[i]| <= 2**-9 + 2**-39 for the table used. // // We compute exp2(i/TBLSIZE) via table lookup and exp2(z - eps[i]) via // a degree-5 minimax polynomial with maximum error under 1.3 * 2**-61. // The values in exp2t[] and eps[] are chosen such that // exp2t[i] = exp2(i/TBLSIZE + eps[i]), and eps[i] is a small offset such // that exp2t[i] is accurate to 2**-64. // // Note that the range of i is +-TBLSIZE/2, so we actually index the tables // by i0 = i + TBLSIZE/2. For cache efficiency, exp2t[] and eps[] are // virtual tables, interleaved in the real table tbl[]. // // This method is due to Gal, with many details due to Gal and Bachelis: // // Gal, S. and Bachelis, B. An Accurate Elementary Mathematical Library // for the IEEE Floating Point Standard. TOMS 17(1), 26-46 (1991). /// Exponential, base 2 (f64) /// /// Calculate `2^x`, that is, 2 raised to the power `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp2(mut x: f64) -> f64 { let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64; let p1 = f64::from_bits(0x3fe62e42fefa39ef); let p2 = f64::from_bits(0x3fcebfbdff82c575); let p3 = f64::from_bits(0x3fac6b08d704a0a6); let p4 = f64::from_bits(0x3f83b2ab88f70400); let p5 = f64::from_bits(0x3f55d88003875c74); // double_t r, t, z; // uint32_t ix, i0; // union {double f; uint64_t i;} u = {x}; // union {uint32_t u; int32_t i;} k; let x1p1023 = f64::from_bits(0x7fe0000000000000); let x1p52 = f64::from_bits(0x4330000000000000); let _0x1p_149 = f64::from_bits(0xb6a0000000000000); /* Filter out exceptional cases. */ let ui = f64::to_bits(x); let ix = ui >> 32 & 0x7fffffff; if ix >= 0x408ff000 { /* |x| >= 1022 or nan */ if ix >= 0x40900000 && ui >> 63 == 0 { /* x >= 1024 or nan */ /* overflow */ x *= x1p1023; return x; } if ix >= 0x7ff00000 { /* -inf or -nan */ return -1.0 / x; } if ui >> 63 != 0 { /* x <= -1022 */ /* underflow */ if x <= -1075.0 || x - x1p52 + x1p52 != x { force_eval!((_0x1p_149 / x) as f32); } if x <= -1075.0 { return 0.0; } } } else if ix < 0x3c900000 { /* |x| < 0x1p-54 */ return 1.0 + x; } /* Reduce x, computing z, i0, and k. */ let ui = f64::to_bits(x + redux); let mut i0 = ui as u32; i0 = i0.wrapping_add(TBLSIZE as u32 / 2); let ku = i0 / TBLSIZE as u32 * TBLSIZE as u32; let ki = div!(ku as i32, TBLSIZE as i32); i0 %= TBLSIZE as u32; let uf = f64::from_bits(ui) - redux; let mut z = x - uf; /* Compute r = exp2(y) = exp2t[i0] * p(z - eps[i]). */ let t = f64::from_bits(i!(TBL, 2 * i0 as usize)); /* exp2t[i0] */ z -= f64::from_bits(i!(TBL, 2 * i0 as usize + 1)); /* eps[i0] */ let r = t + t * z * (p1 + z * (p2 + z * (p3 + z * (p4 + z * p5)))); scalbn(r, ki) } #[test] fn i0_wrap_test() { let x = -3.0 / 256.0; assert_eq!(exp2(x), f64::from_bits(0x3fefbdba3692d514)); } compiler_builtins-0.1.101/libm/src/math/exp2f.rs000064400000000000000000000111651046102023000175500ustar 00000000000000// origin: FreeBSD /usr/src/lib/msun/src/s_exp2f.c //- // Copyright (c) 2005 David Schultz // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF // SUCH DAMAGE. const TBLSIZE: usize = 16; static EXP2FT: [u64; TBLSIZE] = [ 0x3fe6a09e667f3bcd, 0x3fe7a11473eb0187, 0x3fe8ace5422aa0db, 0x3fe9c49182a3f090, 0x3feae89f995ad3ad, 0x3fec199bdd85529c, 0x3fed5818dcfba487, 0x3feea4afa2a490da, 0x3ff0000000000000, 0x3ff0b5586cf9890f, 0x3ff172b83c7d517b, 0x3ff2387a6e756238, 0x3ff306fe0a31b715, 0x3ff3dea64c123422, 0x3ff4bfdad5362a27, 0x3ff5ab07dd485429, ]; // exp2f(x): compute the base 2 exponential of x // // Accuracy: Peak error < 0.501 ulp; location of peak: -0.030110927. // // Method: (equally-spaced tables) // // Reduce x: // x = k + y, for integer k and |y| <= 1/2. // Thus we have exp2f(x) = 2**k * exp2(y). // // Reduce y: // y = i/TBLSIZE + z for integer i near y * TBLSIZE. // Thus we have exp2(y) = exp2(i/TBLSIZE) * exp2(z), // with |z| <= 2**-(TBLSIZE+1). // // We compute exp2(i/TBLSIZE) via table lookup and exp2(z) via a // degree-4 minimax polynomial with maximum error under 1.4 * 2**-33. // Using double precision for everything except the reduction makes // roundoff error insignificant and simplifies the scaling step. // // This method is due to Tang, but I do not use his suggested parameters: // // Tang, P. Table-driven Implementation of the Exponential Function // in IEEE Floating-Point Arithmetic. TOMS 15(2), 144-157 (1989). /// Exponential, base 2 (f32) /// /// Calculate `2^x`, that is, 2 raised to the power `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn exp2f(mut x: f32) -> f32 { let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32; let p1 = f32::from_bits(0x3f317218); let p2 = f32::from_bits(0x3e75fdf0); let p3 = f32::from_bits(0x3d6359a4); let p4 = f32::from_bits(0x3c1d964e); // double_t t, r, z; // uint32_t ix, i0, k; let x1p127 = f32::from_bits(0x7f000000); /* Filter out exceptional cases. */ let ui = f32::to_bits(x); let ix = ui & 0x7fffffff; if ix > 0x42fc0000 { /* |x| > 126 */ if ix > 0x7f800000 { /* NaN */ return x; } if ui >= 0x43000000 && ui < 0x80000000 { /* x >= 128 */ x *= x1p127; return x; } if ui >= 0x80000000 { /* x < -126 */ if ui >= 0xc3160000 || (ui & 0x0000ffff != 0) { force_eval!(f32::from_bits(0x80000001) / x); } if ui >= 0xc3160000 { /* x <= -150 */ return 0.0; } } } else if ix <= 0x33000000 { /* |x| <= 0x1p-25 */ return 1.0 + x; } /* Reduce x, computing z, i0, and k. */ let ui = f32::to_bits(x + redux); let mut i0 = ui; i0 += TBLSIZE as u32 / 2; let k = i0 / TBLSIZE as u32; let ukf = f64::from_bits(((0x3ff + k) as u64) << 52); i0 &= TBLSIZE as u32 - 1; let mut uf = f32::from_bits(ui); uf -= redux; let z: f64 = (x - uf) as f64; /* Compute r = exp2(y) = exp2ft[i0] * p(z). */ let r: f64 = f64::from_bits(i!(EXP2FT, i0 as usize)); let t: f64 = r as f64 * z; let r: f64 = r + t * (p1 as f64 + z * p2 as f64) + t * (z * z) * (p3 as f64 + z * p4 as f64); /* Scale by 2**k */ (r * ukf) as f32 } compiler_builtins-0.1.101/libm/src/math/expf.rs000064400000000000000000000057301046102023000174670ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_expf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::scalbnf; const HALF: [f32; 2] = [0.5, -0.5]; const LN2_HI: f32 = 6.9314575195e-01; /* 0x3f317200 */ const LN2_LO: f32 = 1.4286067653e-06; /* 0x35bfbe8e */ const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ /* * Domain [-0.34568, 0.34568], range ~[-4.278e-9, 4.447e-9]: * |x*(exp(x)+1)/(exp(x)-1) - p(x)| < 2**-27.74 */ const P1: f32 = 1.6666625440e-1; /* 0xaaaa8f.0p-26 */ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */ /// Exponential, base *e* (f32) /// /// Calculate the exponential of `x`, that is, *e* raised to the power `x` /// (where *e* is the base of the natural system of logarithms, approximately 2.71828). #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expf(mut x: f32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */ let mut hx = x.to_bits(); let sign = (hx >> 31) as i32; /* sign bit of x */ let signb: bool = sign != 0; hx &= 0x7fffffff; /* high word of |x| */ /* special cases */ if hx >= 0x42aeac50 { /* if |x| >= -87.33655f or NaN */ if hx > 0x7f800000 { /* NaN */ return x; } if (hx >= 0x42b17218) && (!signb) { /* x >= 88.722839f */ /* overflow */ x *= x1p127; return x; } if signb { /* underflow */ force_eval!(-x1p_126 / x); if hx >= 0x42cff1b5 { /* x <= -103.972084f */ return 0.; } } } /* argument reduction */ let k: i32; let hi: f32; let lo: f32; if hx > 0x3eb17218 { /* if |x| > 0.5 ln2 */ if hx > 0x3f851592 { /* if |x| > 1.5 ln2 */ k = (INV_LN2 * x + i!(HALF, sign as usize)) as i32; } else { k = 1 - sign - sign; } let kf = k as f32; hi = x - kf * LN2_HI; /* k*ln2hi is exact here */ lo = kf * LN2_LO; x = hi - lo; } else if hx > 0x39000000 { /* |x| > 2**-14 */ k = 0; hi = x; lo = 0.; } else { /* raise inexact */ force_eval!(x1p127 + x); return 1. + x; } /* x is now in primary range */ let xx = x * x; let c = x - xx * (P1 + xx * P2); let y = 1. + (x * c / (2. - c) - lo + hi); if k == 0 { y } else { scalbnf(y, k) } } compiler_builtins-0.1.101/libm/src/math/expm1.rs000064400000000000000000000103411046102023000175510ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use core::f64; const O_THRESHOLD: f64 = 7.09782712893383973096e+02; /* 0x40862E42, 0xFEFA39EF */ const LN2_HI: f64 = 6.93147180369123816490e-01; /* 0x3fe62e42, 0xfee00000 */ const LN2_LO: f64 = 1.90821492927058770002e-10; /* 0x3dea39ef, 0x35793c76 */ const INVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547, 0x652b82fe */ /* Scaled Q's: Qn_here = 2**n * Qn_above, for R(2*z) where z = hxs = x*x/2: */ const Q1: f64 = -3.33333333333331316428e-02; /* BFA11111 111110F4 */ const Q2: f64 = 1.58730158725481460165e-03; /* 3F5A01A0 19FE5585 */ const Q3: f64 = -7.93650757867487942473e-05; /* BF14CE19 9EAADBB7 */ const Q4: f64 = 4.00821782732936239552e-06; /* 3ED0CFCA 86E65239 */ const Q5: f64 = -2.01099218183624371326e-07; /* BE8AFDB7 6E09C32D */ /// Exponential, base *e*, of x-1 (f64) /// /// Calculates the exponential of `x` and subtract 1, that is, *e* raised /// to the power `x` minus 1 (where *e* is the base of the natural /// system of logarithms, approximately 2.71828). /// The result is accurate even for small values of `x`, /// where using `exp(x)-1` would lose many significant digits. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expm1(mut x: f64) -> f64 { let hi: f64; let lo: f64; let k: i32; let c: f64; let mut t: f64; let mut y: f64; let mut ui = x.to_bits(); let hx = ((ui >> 32) & 0x7fffffff) as u32; let sign = (ui >> 63) as i32; /* filter out huge and non-finite argument */ if hx >= 0x4043687A { /* if |x|>=56*ln2 */ if x.is_nan() { return x; } if sign != 0 { return -1.0; } if x > O_THRESHOLD { x *= f64::from_bits(0x7fe0000000000000); return x; } } /* argument reduction */ if hx > 0x3fd62e42 { /* if |x| > 0.5 ln2 */ if hx < 0x3FF0A2B2 { /* and |x| < 1.5 ln2 */ if sign == 0 { hi = x - LN2_HI; lo = LN2_LO; k = 1; } else { hi = x + LN2_HI; lo = -LN2_LO; k = -1; } } else { k = (INVLN2 * x + if sign != 0 { -0.5 } else { 0.5 }) as i32; t = k as f64; hi = x - t * LN2_HI; /* t*ln2_hi is exact here */ lo = t * LN2_LO; } x = hi - lo; c = (hi - x) - lo; } else if hx < 0x3c900000 { /* |x| < 2**-54, return x */ if hx < 0x00100000 { force_eval!(x); } return x; } else { c = 0.0; k = 0; } /* x is now in primary range */ let hfx = 0.5 * x; let hxs = x * hfx; let r1 = 1.0 + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5)))); t = 3.0 - r1 * hfx; let mut e = hxs * ((r1 - t) / (6.0 - x * t)); if k == 0 { /* c is 0 */ return x - (x * e - hxs); } e = x * (e - c) - c; e -= hxs; /* exp(x) ~ 2^k (x_reduced - e + 1) */ if k == -1 { return 0.5 * (x - e) - 0.5; } if k == 1 { if x < -0.25 { return -2.0 * (e - (x + 0.5)); } return 1.0 + 2.0 * (x - e); } ui = ((0x3ff + k) as u64) << 52; /* 2^k */ let twopk = f64::from_bits(ui); if k < 0 || k > 56 { /* suffice to return exp(x)-1 */ y = x - e + 1.0; if k == 1024 { y = y * 2.0 * f64::from_bits(0x7fe0000000000000); } else { y = y * twopk; } return y - 1.0; } ui = ((0x3ff - k) as u64) << 52; /* 2^-k */ let uf = f64::from_bits(ui); if k < 20 { y = (x - e + (1.0 - uf)) * twopk; } else { y = (x - (e + uf) + 1.0) * twopk; } y } #[cfg(test)] mod tests { #[test] fn sanity_check() { assert_eq!(super::expm1(1.1), 2.0041660239464334); } } compiler_builtins-0.1.101/libm/src/math/expm1f.rs000064400000000000000000000075571046102023000177360ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_expm1f.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ const O_THRESHOLD: f32 = 8.8721679688e+01; /* 0x42b17180 */ const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ const INV_LN2: f32 = 1.4426950216e+00; /* 0x3fb8aa3b */ /* * Domain [-0.34568, 0.34568], range ~[-6.694e-10, 6.696e-10]: * |6 / x * (1 + 2 * (1 / (exp(x) - 1) - 1 / x)) - q(x)| < 2**-30.04 * Scaled coefficients: Qn_here = 2**n * Qn_for_q (see s_expm1.c): */ const Q1: f32 = -3.3333212137e-2; /* -0x888868.0p-28 */ const Q2: f32 = 1.5807170421e-3; /* 0xcf3010.0p-33 */ /// Exponential, base *e*, of x-1 (f32) /// /// Calculates the exponential of `x` and subtract 1, that is, *e* raised /// to the power `x` minus 1 (where *e* is the base of the natural /// system of logarithms, approximately 2.71828). /// The result is accurate even for small values of `x`, /// where using `exp(x)-1` would lose many significant digits. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn expm1f(mut x: f32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 let mut hx = x.to_bits(); let sign = (hx >> 31) != 0; hx &= 0x7fffffff; /* filter out huge and non-finite argument */ if hx >= 0x4195b844 { /* if |x|>=27*ln2 */ if hx > 0x7f800000 { /* NaN */ return x; } if sign { return -1.; } if x > O_THRESHOLD { x *= x1p127; return x; } } let k: i32; let hi: f32; let lo: f32; let mut c = 0f32; /* argument reduction */ if hx > 0x3eb17218 { /* if |x| > 0.5 ln2 */ if hx < 0x3F851592 { /* and |x| < 1.5 ln2 */ if !sign { hi = x - LN2_HI; lo = LN2_LO; k = 1; } else { hi = x + LN2_HI; lo = -LN2_LO; k = -1; } } else { k = (INV_LN2 * x + (if sign { -0.5 } else { 0.5 })) as i32; let t = k as f32; hi = x - t * LN2_HI; /* t*ln2_hi is exact here */ lo = t * LN2_LO; } x = hi - lo; c = (hi - x) - lo; } else if hx < 0x33000000 { /* when |x|<2**-25, return x */ if hx < 0x00800000 { force_eval!(x * x); } return x; } else { k = 0; } /* x is now in primary range */ let hfx = 0.5 * x; let hxs = x * hfx; let r1 = 1. + hxs * (Q1 + hxs * Q2); let t = 3. - r1 * hfx; let mut e = hxs * ((r1 - t) / (6. - x * t)); if k == 0 { /* c is 0 */ return x - (x * e - hxs); } e = x * (e - c) - c; e -= hxs; /* exp(x) ~ 2^k (x_reduced - e + 1) */ if k == -1 { return 0.5 * (x - e) - 0.5; } if k == 1 { if x < -0.25 { return -2. * (e - (x + 0.5)); } return 1. + 2. * (x - e); } let twopk = f32::from_bits(((0x7f + k) << 23) as u32); /* 2^k */ if (k < 0) || (k > 56) { /* suffice to return exp(x)-1 */ let mut y = x - e + 1.; if k == 128 { y = y * 2. * x1p127; } else { y = y * twopk; } return y - 1.; } let uf = f32::from_bits(((0x7f - k) << 23) as u32); /* 2^-k */ if k < 23 { (x - e + (1. - uf)) * twopk } else { (x - (e + uf) + 1.) * twopk } } compiler_builtins-0.1.101/libm/src/math/expo2.rs000064400000000000000000000010701046102023000175530ustar 00000000000000use super::{combine_words, exp}; /* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn expo2(x: f64) -> f64 { /* k is such that k*ln2 has minimal relative error and x - kln2 > log(DBL_MIN) */ const K: i32 = 2043; let kln2 = f64::from_bits(0x40962066151add8b); /* note that k is odd and scale*scale overflows */ let scale = combine_words(((0x3ff + K / 2) as u32) << 20, 0); /* exp(x - k ln2) * 2**(k-1) */ exp(x - kln2) * scale * scale } compiler_builtins-0.1.101/libm/src/math/fabs.rs000064400000000000000000000022451046102023000174360ustar 00000000000000use core::u64; /// Absolute value (magnitude) (f64) /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabs(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.abs` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return unsafe { ::core::intrinsics::fabsf64(x) } } } f64::from_bits(x.to_bits() & (u64::MAX / 2)) } #[cfg(test)] mod tests { use super::*; use core::f64::*; #[test] fn sanity_check() { assert_eq!(fabs(-1.0), 1.0); assert_eq!(fabs(2.8), 2.8); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs #[test] fn spec_tests() { assert!(fabs(NAN).is_nan()); for f in [0.0, -0.0].iter().copied() { assert_eq!(fabs(f), 0.0); } for f in [INFINITY, NEG_INFINITY].iter().copied() { assert_eq!(fabs(f), INFINITY); } } } compiler_builtins-0.1.101/libm/src/math/fabsf.rs000064400000000000000000000024261046102023000176050ustar 00000000000000/// Absolute value (magnitude) (f32) /// Calculates the absolute value (magnitude) of the argument `x`, /// by direct manipulation of the bit representation of `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fabsf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.abs` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return unsafe { ::core::intrinsics::fabsf32(x) } } } f32::from_bits(x.to_bits() & 0x7fffffff) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::*; use core::f32::*; #[test] fn sanity_check() { assert_eq!(fabsf(-1.0), 1.0); assert_eq!(fabsf(2.8), 2.8); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/fabs #[test] fn spec_tests() { assert!(fabsf(NAN).is_nan()); for f in [0.0, -0.0].iter().copied() { assert_eq!(fabsf(f), 0.0); } for f in [INFINITY, NEG_INFINITY].iter().copied() { assert_eq!(fabsf(f), INFINITY); } } } compiler_builtins-0.1.101/libm/src/math/fdim.rs000064400000000000000000000007261046102023000174440ustar 00000000000000use core::f64; /// Positive difference (f64) /// /// Determines the positive difference between arguments, returning: /// * x - y if x > y, or /// * +0 if x <= y, or /// * NAN if either argument is NAN. /// /// A range error may occur. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdim(x: f64, y: f64) -> f64 { if x.is_nan() { x } else if y.is_nan() { y } else if x > y { x - y } else { 0.0 } } compiler_builtins-0.1.101/libm/src/math/fdimf.rs000064400000000000000000000007271046102023000176130ustar 00000000000000use core::f32; /// Positive difference (f32) /// /// Determines the positive difference between arguments, returning: /// * x - y if x > y, or /// * +0 if x <= y, or /// * NAN if either argument is NAN. /// /// A range error may occur. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fdimf(x: f32, y: f32) -> f32 { if x.is_nan() { x } else if y.is_nan() { y } else if x > y { x - y } else { 0.0 } } compiler_builtins-0.1.101/libm/src/math/fenv.rs000064400000000000000000000007361046102023000174640ustar 00000000000000// src: musl/src/fenv/fenv.c /* Dummy functions for archs lacking fenv implementation */ pub(crate) const FE_UNDERFLOW: i32 = 0; pub(crate) const FE_INEXACT: i32 = 0; pub(crate) const FE_TONEAREST: i32 = 0; #[inline] pub(crate) fn feclearexcept(_mask: i32) -> i32 { 0 } #[inline] pub(crate) fn feraiseexcept(_mask: i32) -> i32 { 0 } #[inline] pub(crate) fn fetestexcept(_mask: i32) -> i32 { 0 } #[inline] pub(crate) fn fegetround() -> i32 { FE_TONEAREST } compiler_builtins-0.1.101/libm/src/math/floor.rs000064400000000000000000000044351046102023000176470ustar 00000000000000#![allow(unreachable_code)] use core::f64; const TOINT: f64 = 1. / f64::EPSILON; /// Floor (f64) /// /// Finds the nearest integer less than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floor(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.floor` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return unsafe { ::core::intrinsics::floorf64(x) } } } #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] { //use an alternative implementation on x86, because the //main implementation fails with the x87 FPU used by //debian i386, probablly due to excess precision issues. //basic implementation taken from https://github.com/rust-lang/libm/issues/219 use super::fabs; if fabs(x).to_bits() < 4503599627370496.0_f64.to_bits() { let truncated = x as i64 as f64; if truncated > x { return truncated - 1.0; } else { return truncated; } } else { return x; } } let ui = x.to_bits(); let e = ((ui >> 52) & 0x7ff) as i32; if (e >= 0x3ff + 52) || (x == 0.) { return x; } /* y = int(x) - x, where int(x) is an integer neighbor of x */ let y = if (ui >> 63) != 0 { x - TOINT + TOINT - x } else { x + TOINT - TOINT - x }; /* special case because of non-nearest rounding modes */ if e < 0x3ff { force_eval!(y); return if (ui >> 63) != 0 { -1. } else { 0. }; } if y > 0. { x + y - 1. } else { x + y } } #[cfg(test)] mod tests { use super::*; use core::f64::*; #[test] fn sanity_check() { assert_eq!(floor(1.1), 1.0); assert_eq!(floor(2.9), 2.0); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor #[test] fn spec_tests() { // Not Asserted: that the current rounding mode has no effect. assert!(floor(NAN).is_nan()); for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { assert_eq!(floor(f), f); } } } compiler_builtins-0.1.101/libm/src/math/floorf.rs000064400000000000000000000033671046102023000200200ustar 00000000000000use core::f32; /// Floor (f32) /// /// Finds the nearest integer less than or equal to `x`. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn floorf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.floor` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return unsafe { ::core::intrinsics::floorf32(x) } } } let mut ui = x.to_bits(); let e = (((ui >> 23) as i32) & 0xff) - 0x7f; if e >= 23 { return x; } if e >= 0 { let m: u32 = 0x007fffff >> e; if (ui & m) == 0 { return x; } force_eval!(x + f32::from_bits(0x7b800000)); if ui >> 31 != 0 { ui += m; } ui &= !m; } else { force_eval!(x + f32::from_bits(0x7b800000)); if ui >> 31 == 0 { ui = 0; } else if ui << 1 != 0 { return -1.0; } } f32::from_bits(ui) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::*; use core::f32::*; #[test] fn sanity_check() { assert_eq!(floorf(0.5), 0.0); assert_eq!(floorf(1.1), 1.0); assert_eq!(floorf(2.9), 2.0); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/floor #[test] fn spec_tests() { // Not Asserted: that the current rounding mode has no effect. assert!(floorf(NAN).is_nan()); for f in [0.0, -0.0, INFINITY, NEG_INFINITY].iter().copied() { assert_eq!(floorf(f), f); } } } compiler_builtins-0.1.101/libm/src/math/fma.rs000064400000000000000000000150621046102023000172670ustar 00000000000000use core::{f32, f64}; use super::scalbn; const ZEROINFNAN: i32 = 0x7ff - 0x3ff - 52 - 1; struct Num { m: u64, e: i32, sign: i32, } fn normalize(x: f64) -> Num { let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 let mut ix: u64 = x.to_bits(); let mut e: i32 = (ix >> 52) as i32; let sign: i32 = e & 0x800; e &= 0x7ff; if e == 0 { ix = (x * x1p63).to_bits(); e = (ix >> 52) as i32 & 0x7ff; e = if e != 0 { e - 63 } else { 0x800 }; } ix &= (1 << 52) - 1; ix |= 1 << 52; ix <<= 1; e -= 0x3ff + 52 + 1; Num { m: ix, e, sign } } #[inline] fn mul(x: u64, y: u64) -> (u64, u64) { let t = (x as u128).wrapping_mul(y as u128); ((t >> 64) as u64, t as u64) } /// Floating multiply add (f64) /// /// Computes `(x*y)+z`, rounded as one ternary operation: /// Computes the value (as if) to infinite precision and rounds once to the result format, /// according to the rounding mode characterized by the value of FLT_ROUNDS. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fma(x: f64, y: f64, z: f64) -> f64 { let x1p63: f64 = f64::from_bits(0x43e0000000000000); // 0x1p63 === 2 ^ 63 let x0_ffffff8p_63 = f64::from_bits(0x3bfffffff0000000); // 0x0.ffffff8p-63 /* normalize so top 10bits and last bit are 0 */ let nx = normalize(x); let ny = normalize(y); let nz = normalize(z); if nx.e >= ZEROINFNAN || ny.e >= ZEROINFNAN { return x * y + z; } if nz.e >= ZEROINFNAN { if nz.e > ZEROINFNAN { /* z==0 */ return x * y + z; } return z; } /* mul: r = x*y */ let zhi: u64; let zlo: u64; let (mut rhi, mut rlo) = mul(nx.m, ny.m); /* either top 20 or 21 bits of rhi and last 2 bits of rlo are 0 */ /* align exponents */ let mut e: i32 = nx.e + ny.e; let mut d: i32 = nz.e - e; /* shift bits z<<=kz, r>>=kr, so kz+kr == d, set e = e+kr (== ez-kz) */ if d > 0 { if d < 64 { zlo = nz.m << d; zhi = nz.m >> (64 - d); } else { zlo = 0; zhi = nz.m; e = nz.e - 64; d -= 64; if d == 0 { } else if d < 64 { rlo = rhi << (64 - d) | rlo >> d | ((rlo << (64 - d)) != 0) as u64; rhi = rhi >> d; } else { rlo = 1; rhi = 0; } } } else { zhi = 0; d = -d; if d == 0 { zlo = nz.m; } else if d < 64 { zlo = nz.m >> d | ((nz.m << (64 - d)) != 0) as u64; } else { zlo = 1; } } /* add */ let mut sign: i32 = nx.sign ^ ny.sign; let samesign: bool = (sign ^ nz.sign) == 0; let mut nonzero: i32 = 1; if samesign { /* r += z */ rlo = rlo.wrapping_add(zlo); rhi += zhi + (rlo < zlo) as u64; } else { /* r -= z */ let (res, borrow) = rlo.overflowing_sub(zlo); rlo = res; rhi = rhi.wrapping_sub(zhi.wrapping_add(borrow as u64)); if (rhi >> 63) != 0 { rlo = (rlo as i64).wrapping_neg() as u64; rhi = (rhi as i64).wrapping_neg() as u64 - (rlo != 0) as u64; sign = (sign == 0) as i32; } nonzero = (rhi != 0) as i32; } /* set rhi to top 63bit of the result (last bit is sticky) */ if nonzero != 0 { e += 64; d = rhi.leading_zeros() as i32 - 1; /* note: d > 0 */ rhi = rhi << d | rlo >> (64 - d) | ((rlo << d) != 0) as u64; } else if rlo != 0 { d = rlo.leading_zeros() as i32 - 1; if d < 0 { rhi = rlo >> 1 | (rlo & 1); } else { rhi = rlo << d; } } else { /* exact +-0 */ return x * y + z; } e -= d; /* convert to double */ let mut i: i64 = rhi as i64; /* i is in [1<<62,(1<<63)-1] */ if sign != 0 { i = -i; } let mut r: f64 = i as f64; /* |r| is in [0x1p62,0x1p63] */ if e < -1022 - 62 { /* result is subnormal before rounding */ if e == -1022 - 63 { let mut c: f64 = x1p63; if sign != 0 { c = -c; } if r == c { /* min normal after rounding, underflow depends on arch behaviour which can be imitated by a double to float conversion */ let fltmin: f32 = (x0_ffffff8p_63 * f32::MIN_POSITIVE as f64 * r) as f32; return f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * fltmin as f64; } /* one bit is lost when scaled, add another top bit to only round once at conversion if it is inexact */ if (rhi << 53) != 0 { i = (rhi >> 1 | (rhi & 1) | 1 << 62) as i64; if sign != 0 { i = -i; } r = i as f64; r = 2. * r - c; /* remove top bit */ /* raise underflow portably, such that it cannot be optimized away */ { let tiny: f64 = f64::MIN_POSITIVE / f32::MIN_POSITIVE as f64 * r; r += (tiny * tiny) * (r - r); } } } else { /* only round once when scaled */ d = 10; i = ((rhi >> d | ((rhi << (64 - d)) != 0) as u64) << d) as i64; if sign != 0 { i = -i; } r = i as f64; } } scalbn(r, e) } #[cfg(test)] mod tests { use super::*; #[test] fn fma_segfault() { // These two inputs cause fma to segfault on release due to overflow: assert_eq!( fma( -0.0000000000000002220446049250313, -0.0000000000000002220446049250313, -0.0000000000000002220446049250313 ), -0.00000000000000022204460492503126, ); let result = fma(-0.992, -0.992, -0.992); //force rounding to storage format on x87 to prevent superious errors. #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] let result = force_eval!(result); assert_eq!(result, -0.007936000000000007,); } #[test] fn fma_sbb() { assert_eq!( fma(-(1.0 - f64::EPSILON), f64::MIN, f64::MIN), -3991680619069439e277 ); } #[test] fn fma_underflow() { assert_eq!( fma(1.1102230246251565e-16, -9.812526705433188e-305, 1.0894e-320), 0.0, ); } } compiler_builtins-0.1.101/libm/src/math/fmaf.rs000064400000000000000000000100471046102023000174330ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_fmaf.c */ /*- * Copyright (c) 2005-2011 David Schultz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ use core::f32; use core::ptr::read_volatile; use super::fenv::{ feclearexcept, fegetround, feraiseexcept, fetestexcept, FE_INEXACT, FE_TONEAREST, FE_UNDERFLOW, }; /* * Fused multiply-add: Compute x * y + z with a single rounding error. * * A double has more than twice as much precision than a float, so * direct double-precision arithmetic suffices, except where double * rounding occurs. */ /// Floating multiply add (f32) /// /// Computes `(x*y)+z`, rounded as one ternary operation: /// Computes the value (as if) to infinite precision and rounds once to the result format, /// according to the rounding mode characterized by the value of FLT_ROUNDS. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaf(x: f32, y: f32, mut z: f32) -> f32 { let xy: f64; let mut result: f64; let mut ui: u64; let e: i32; xy = x as f64 * y as f64; result = xy + z as f64; ui = result.to_bits(); e = (ui >> 52) as i32 & 0x7ff; /* Common case: The double precision result is fine. */ if ( /* not a halfway case */ ui & 0x1fffffff) != 0x10000000 || /* NaN */ e == 0x7ff || /* exact */ (result - xy == z as f64 && result - z as f64 == xy) || /* not round-to-nearest */ fegetround() != FE_TONEAREST { /* underflow may not be raised correctly, example: fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f) */ if e < 0x3ff - 126 && e >= 0x3ff - 149 && fetestexcept(FE_INEXACT) != 0 { feclearexcept(FE_INEXACT); // prevent `xy + vz` from being CSE'd with `xy + z` above let vz: f32 = unsafe { read_volatile(&z) }; result = xy + vz as f64; if fetestexcept(FE_INEXACT) != 0 { feraiseexcept(FE_UNDERFLOW); } else { feraiseexcept(FE_INEXACT); } } z = result as f32; return z; } /* * If result is inexact, and exactly halfway between two float values, * we need to adjust the low-order bit in the direction of the error. */ let neg = ui >> 63 != 0; let err = if neg == (z as f64 > xy) { xy - result + z as f64 } else { z as f64 - result + xy }; if neg == (err < 0.0) { ui += 1; } else { ui -= 1; } f64::from_bits(ui) as f32 } #[cfg(test)] mod tests { #[test] fn issue_263() { let a = f32::from_bits(1266679807); let b = f32::from_bits(1300234242); let c = f32::from_bits(1115553792); let expected = f32::from_bits(1501560833); assert_eq!(super::fmaf(a, b, c), expected); } } compiler_builtins-0.1.101/libm/src/math/fmax.rs000064400000000000000000000014031046102023000174510ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmax(x: f64, y: f64) -> f64 { // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it // is either x or y, canonicalized (this means results might differ among implementations). // When either x or y is a signalingNaN, then the result is according to 6.2. // // Since we do not support sNaN in Rust yet, we do not need to handle them. // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by // multiplying by 1.0. Should switch to the `canonicalize` when it works. (if x.is_nan() || x < y { y } else { x }) * 1.0 } compiler_builtins-0.1.101/libm/src/math/fmaxf.rs000064400000000000000000000014041046102023000176200ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmaxf(x: f32, y: f32) -> f32 { // IEEE754 says: maxNum(x, y) is the canonicalized number y if x < y, x if y < x, the // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it // is either x or y, canonicalized (this means results might differ among implementations). // When either x or y is a signalingNaN, then the result is according to 6.2. // // Since we do not support sNaN in Rust yet, we do not need to handle them. // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by // multiplying by 1.0. Should switch to the `canonicalize` when it works. (if x.is_nan() || x < y { y } else { x }) * 1.0 } compiler_builtins-0.1.101/libm/src/math/fmin.rs000064400000000000000000000014031046102023000174470ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmin(x: f64, y: f64) -> f64 { // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it // is either x or y, canonicalized (this means results might differ among implementations). // When either x or y is a signalingNaN, then the result is according to 6.2. // // Since we do not support sNaN in Rust yet, we do not need to handle them. // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by // multiplying by 1.0. Should switch to the `canonicalize` when it works. (if y.is_nan() || x < y { x } else { y }) * 1.0 } compiler_builtins-0.1.101/libm/src/math/fminf.rs000064400000000000000000000014041046102023000176160ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fminf(x: f32, y: f32) -> f32 { // IEEE754 says: minNum(x, y) is the canonicalized number x if x < y, y if y < x, the // canonicalized number if one operand is a number and the other a quiet NaN. Otherwise it // is either x or y, canonicalized (this means results might differ among implementations). // When either x or y is a signalingNaN, then the result is according to 6.2. // // Since we do not support sNaN in Rust yet, we do not need to handle them. // FIXME(nagisa): due to https://bugs.llvm.org/show_bug.cgi?id=33303 we canonicalize by // multiplying by 1.0. Should switch to the `canonicalize` when it works. (if y.is_nan() || x < y { x } else { y }) * 1.0 } compiler_builtins-0.1.101/libm/src/math/fmod.rs000064400000000000000000000031451046102023000174500ustar 00000000000000use core::u64; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmod(x: f64, y: f64) -> f64 { let mut uxi = x.to_bits(); let mut uyi = y.to_bits(); let mut ex = (uxi >> 52 & 0x7ff) as i64; let mut ey = (uyi >> 52 & 0x7ff) as i64; let sx = uxi >> 63; let mut i; if uyi << 1 == 0 || y.is_nan() || ex == 0x7ff { return (x * y) / (x * y); } if uxi << 1 <= uyi << 1 { if uxi << 1 == uyi << 1 { return 0.0 * x; } return x; } /* normalize x and y */ if ex == 0 { i = uxi << 12; while i >> 63 == 0 { ex -= 1; i <<= 1; } uxi <<= -ex + 1; } else { uxi &= u64::MAX >> 12; uxi |= 1 << 52; } if ey == 0 { i = uyi << 12; while i >> 63 == 0 { ey -= 1; i <<= 1; } uyi <<= -ey + 1; } else { uyi &= u64::MAX >> 12; uyi |= 1 << 52; } /* x mod y */ while ex > ey { i = uxi.wrapping_sub(uyi); if i >> 63 == 0 { if i == 0 { return 0.0 * x; } uxi = i; } uxi <<= 1; ex -= 1; } i = uxi.wrapping_sub(uyi); if i >> 63 == 0 { if i == 0 { return 0.0 * x; } uxi = i; } while uxi >> 52 == 0 { uxi <<= 1; ex -= 1; } /* scale result */ if ex > 0 { uxi -= 1 << 52; uxi |= (ex as u64) << 52; } else { uxi >>= -ex + 1; } uxi |= (sx as u64) << 63; f64::from_bits(uxi) } compiler_builtins-0.1.101/libm/src/math/fmodf.rs000064400000000000000000000031611046102023000176140ustar 00000000000000use core::f32; use core::u32; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn fmodf(x: f32, y: f32) -> f32 { let mut uxi = x.to_bits(); let mut uyi = y.to_bits(); let mut ex = (uxi >> 23 & 0xff) as i32; let mut ey = (uyi >> 23 & 0xff) as i32; let sx = uxi & 0x80000000; let mut i; if uyi << 1 == 0 || y.is_nan() || ex == 0xff { return (x * y) / (x * y); } if uxi << 1 <= uyi << 1 { if uxi << 1 == uyi << 1 { return 0.0 * x; } return x; } /* normalize x and y */ if ex == 0 { i = uxi << 9; while i >> 31 == 0 { ex -= 1; i <<= 1; } uxi <<= -ex + 1; } else { uxi &= u32::MAX >> 9; uxi |= 1 << 23; } if ey == 0 { i = uyi << 9; while i >> 31 == 0 { ey -= 1; i <<= 1; } uyi <<= -ey + 1; } else { uyi &= u32::MAX >> 9; uyi |= 1 << 23; } /* x mod y */ while ex > ey { i = uxi.wrapping_sub(uyi); if i >> 31 == 0 { if i == 0 { return 0.0 * x; } uxi = i; } uxi <<= 1; ex -= 1; } i = uxi.wrapping_sub(uyi); if i >> 31 == 0 { if i == 0 { return 0.0 * x; } uxi = i; } while uxi >> 23 == 0 { uxi <<= 1; ex -= 1; } /* scale result up */ if ex > 0 { uxi -= 1 << 23; uxi |= (ex as u32) << 23; } else { uxi >>= -ex + 1; } uxi |= sx; f32::from_bits(uxi) } compiler_builtins-0.1.101/libm/src/math/frexp.rs000064400000000000000000000007541046102023000176520ustar 00000000000000pub fn frexp(x: f64) -> (f64, i32) { let mut y = x.to_bits(); let ee = ((y >> 52) & 0x7ff) as i32; if ee == 0 { if x != 0.0 { let x1p64 = f64::from_bits(0x43f0000000000000); let (x, e) = frexp(x * x1p64); return (x, e - 64); } return (x, 0); } else if ee == 0x7ff { return (x, 0); } let e = ee - 0x3fe; y &= 0x800fffffffffffff; y |= 0x3fe0000000000000; return (f64::from_bits(y), e); } compiler_builtins-0.1.101/libm/src/math/frexpf.rs000064400000000000000000000007451046102023000200200ustar 00000000000000pub fn frexpf(x: f32) -> (f32, i32) { let mut y = x.to_bits(); let ee: i32 = ((y >> 23) & 0xff) as i32; if ee == 0 { if x != 0.0 { let x1p64 = f32::from_bits(0x5f800000); let (x, e) = frexpf(x * x1p64); return (x, e - 64); } else { return (x, 0); } } else if ee == 0xff { return (x, 0); } let e = ee - 0x7e; y &= 0x807fffff; y |= 0x3f000000; (f32::from_bits(y), e) } compiler_builtins-0.1.101/libm/src/math/hypot.rs000064400000000000000000000033641046102023000176710ustar 00000000000000use core::f64; use super::sqrt; const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1 fn sq(x: f64) -> (f64, f64) { let xh: f64; let xl: f64; let xc: f64; xc = x * SPLIT; xh = x - xc + xc; xl = x - xh; let hi = x * x; let lo = xh * xh - hi + 2. * xh * xl + xl * xl; (hi, lo) } #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn hypot(mut x: f64, mut y: f64) -> f64 { let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700 let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700 let mut uxi = x.to_bits(); let mut uyi = y.to_bits(); let uti; let ex: i64; let ey: i64; let mut z: f64; /* arrange |x| >= |y| */ uxi &= -1i64 as u64 >> 1; uyi &= -1i64 as u64 >> 1; if uxi < uyi { uti = uxi; uxi = uyi; uyi = uti; } /* special cases */ ex = (uxi >> 52) as i64; ey = (uyi >> 52) as i64; x = f64::from_bits(uxi); y = f64::from_bits(uyi); /* note: hypot(inf,nan) == inf */ if ey == 0x7ff { return y; } if ex == 0x7ff || uyi == 0 { return x; } /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */ /* 64 difference is enough for ld80 double_t */ if ex - ey > 64 { return x + y; } /* precise sqrt argument in nearest rounding mode without overflow */ /* xh*xh must not overflow and xl*xl must not underflow in sq */ z = 1.; if ex > 0x3ff + 510 { z = x1p700; x *= x1p_700; y *= x1p_700; } else if ey < 0x3ff - 450 { z = x1p_700; x *= x1p700; y *= x1p700; } let (hx, lx) = sq(x); let (hy, ly) = sq(y); z * sqrt(ly + lx + hy + hx) } compiler_builtins-0.1.101/libm/src/math/hypotf.rs000064400000000000000000000017501046102023000200340ustar 00000000000000use core::f32; use super::sqrtf; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn hypotf(mut x: f32, mut y: f32) -> f32 { let x1p90 = f32::from_bits(0x6c800000); // 0x1p90f === 2 ^ 90 let x1p_90 = f32::from_bits(0x12800000); // 0x1p-90f === 2 ^ -90 let mut uxi = x.to_bits(); let mut uyi = y.to_bits(); let uti; let mut z: f32; uxi &= -1i32 as u32 >> 1; uyi &= -1i32 as u32 >> 1; if uxi < uyi { uti = uxi; uxi = uyi; uyi = uti; } x = f32::from_bits(uxi); y = f32::from_bits(uyi); if uyi == 0xff << 23 { return y; } if uxi >= 0xff << 23 || uyi == 0 || uxi - uyi >= 25 << 23 { return x + y; } z = 1.; if uxi >= (0x7f + 60) << 23 { z = x1p90; x *= x1p_90; y *= x1p_90; } else if uyi < (0x7f - 60) << 23 { z = x1p_90; x *= x1p90; y *= x1p90; } z * sqrtf((x as f64 * x as f64 + y as f64 * y as f64) as f32) } compiler_builtins-0.1.101/libm/src/math/ilogb.rs000064400000000000000000000013261046102023000176160ustar 00000000000000const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; const FP_ILOGB0: i32 = FP_ILOGBNAN; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ilogb(x: f64) -> i32 { let mut i: u64 = x.to_bits(); let e = ((i >> 52) & 0x7ff) as i32; if e == 0 { i <<= 12; if i == 0 { force_eval!(0.0 / 0.0); return FP_ILOGB0; } /* subnormal x */ let mut e = -0x3ff; while (i >> 63) == 0 { e -= 1; i <<= 1; } e } else if e == 0x7ff { force_eval!(0.0 / 0.0); if (i << 12) != 0 { FP_ILOGBNAN } else { i32::max_value() } } else { e - 0x3ff } } compiler_builtins-0.1.101/libm/src/math/ilogbf.rs000064400000000000000000000013141046102023000177610ustar 00000000000000const FP_ILOGBNAN: i32 = -1 - 0x7fffffff; const FP_ILOGB0: i32 = FP_ILOGBNAN; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ilogbf(x: f32) -> i32 { let mut i = x.to_bits(); let e = ((i >> 23) & 0xff) as i32; if e == 0 { i <<= 9; if i == 0 { force_eval!(0.0 / 0.0); return FP_ILOGB0; } /* subnormal x */ let mut e = -0x7f; while (i >> 31) == 0 { e -= 1; i <<= 1; } e } else if e == 0xff { force_eval!(0.0 / 0.0); if (i << 9) != 0 { FP_ILOGBNAN } else { i32::max_value() } } else { e - 0x7f } } compiler_builtins-0.1.101/libm/src/math/j0.rs000064400000000000000000000361551046102023000170430ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_j0.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* j0(x), y0(x) * Bessel function of the first and second kinds of order zero. * Method -- j0(x): * 1. For tiny x, we use j0(x) = 1 - x^2/4 + x^4/64 - ... * 2. Reduce x to |x| since j0(x)=j0(-x), and * for x in (0,2) * j0(x) = 1-z/4+ z^2*R0/S0, where z = x*x; * (precision: |j0-1+z/4-z^2R0/S0 |<2**-63.67 ) * for x in (2,inf) * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)-q0(x)*sin(x0)) * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) * as follow: * cos(x0) = cos(x)cos(pi/4)+sin(x)sin(pi/4) * = 1/sqrt(2) * (cos(x) + sin(x)) * sin(x0) = sin(x)cos(pi/4)-cos(x)sin(pi/4) * = 1/sqrt(2) * (sin(x) - cos(x)) * (To avoid cancellation, use * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) * to compute the worse one.) * * 3 Special cases * j0(nan)= nan * j0(0) = 1 * j0(inf) = 0 * * Method -- y0(x): * 1. For x<2. * Since * y0(x) = 2/pi*(j0(x)*(ln(x/2)+Euler) + x^2/4 - ...) * therefore y0(x)-2/pi*j0(x)*ln(x) is an even function. * We use the following function to approximate y0, * y0(x) = U(z)/V(z) + (2/pi)*(j0(x)*ln(x)), z= x^2 * where * U(z) = u00 + u01*z + ... + u06*z^6 * V(z) = 1 + v01*z + ... + v04*z^4 * with absolute approximation error bounded by 2**-72. * Note: For tiny x, U/V = u0 and j0(x)~1, hence * y0(tiny) = u0 + (2/pi)*ln(tiny), (choose tiny<2**-27) * 2. For x>=2. * y0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x0)+q0(x)*sin(x0)) * where x0 = x-pi/4. It is better to compute sin(x0),cos(x0) * by the method mentioned above. * 3. Special cases: y0(0)=-inf, y0(x<0)=NaN, y0(inf)=0. */ use super::{cos, fabs, get_high_word, get_low_word, log, sin, sqrt}; const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ /* common method when |x|>=2 */ fn common(ix: u32, x: f64, y0: bool) -> f64 { let s: f64; let mut c: f64; let mut ss: f64; let mut cc: f64; let z: f64; /* * j0(x) = sqrt(2/(pi*x))*(p0(x)*cos(x-pi/4)-q0(x)*sin(x-pi/4)) * y0(x) = sqrt(2/(pi*x))*(p0(x)*sin(x-pi/4)+q0(x)*cos(x-pi/4)) * * sin(x-pi/4) = (sin(x) - cos(x))/sqrt(2) * cos(x-pi/4) = (sin(x) + cos(x))/sqrt(2) * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) */ s = sin(x); c = cos(x); if y0 { c = -c; } cc = s + c; /* avoid overflow in 2*x, big ulp error when x>=0x1p1023 */ if ix < 0x7fe00000 { ss = s - c; z = -cos(2.0 * x); if s * c < 0.0 { cc = z / ss; } else { ss = z / cc; } if ix < 0x48000000 { if y0 { ss = -ss; } cc = pzero(x) * cc - qzero(x) * ss; } } return INVSQRTPI * cc / sqrt(x); } /* R0/S0 on [0, 2.00] */ const R02: f64 = 1.56249999999999947958e-02; /* 0x3F8FFFFF, 0xFFFFFFFD */ const R03: f64 = -1.89979294238854721751e-04; /* 0xBF28E6A5, 0xB61AC6E9 */ const R04: f64 = 1.82954049532700665670e-06; /* 0x3EBEB1D1, 0x0C503919 */ const R05: f64 = -4.61832688532103189199e-09; /* 0xBE33D5E7, 0x73D63FCE */ const S01: f64 = 1.56191029464890010492e-02; /* 0x3F8FFCE8, 0x82C8C2A4 */ const S02: f64 = 1.16926784663337450260e-04; /* 0x3F1EA6D2, 0xDD57DBF4 */ const S03: f64 = 5.13546550207318111446e-07; /* 0x3EA13B54, 0xCE84D5A9 */ const S04: f64 = 1.16614003333790000205e-09; /* 0x3E1408BC, 0xF4745D8F */ pub fn j0(mut x: f64) -> f64 { let z: f64; let r: f64; let s: f64; let mut ix: u32; ix = get_high_word(x); ix &= 0x7fffffff; /* j0(+-inf)=0, j0(nan)=nan */ if ix >= 0x7ff00000 { return 1.0 / (x * x); } x = fabs(x); if ix >= 0x40000000 { /* |x| >= 2 */ /* large ulp error near zeros: 2.4, 5.52, 8.6537,.. */ return common(ix, x, false); } /* 1 - x*x/4 + x*x*R(x^2)/S(x^2) */ if ix >= 0x3f200000 { /* |x| >= 2**-13 */ /* up to 4ulp error close to 2 */ z = x * x; r = z * (R02 + z * (R03 + z * (R04 + z * R05))); s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * S04))); return (1.0 + x / 2.0) * (1.0 - x / 2.0) + z * (r / s); } /* 1 - x*x/4 */ /* prevent underflow */ /* inexact should be raised when x!=0, this is not done correctly */ if ix >= 0x38000000 { /* |x| >= 2**-127 */ x = 0.25 * x * x; } return 1.0 - x; } const U00: f64 = -7.38042951086872317523e-02; /* 0xBFB2E4D6, 0x99CBD01F */ const U01: f64 = 1.76666452509181115538e-01; /* 0x3FC69D01, 0x9DE9E3FC */ const U02: f64 = -1.38185671945596898896e-02; /* 0xBF8C4CE8, 0xB16CFA97 */ const U03: f64 = 3.47453432093683650238e-04; /* 0x3F36C54D, 0x20B29B6B */ const U04: f64 = -3.81407053724364161125e-06; /* 0xBECFFEA7, 0x73D25CAD */ const U05: f64 = 1.95590137035022920206e-08; /* 0x3E550057, 0x3B4EABD4 */ const U06: f64 = -3.98205194132103398453e-11; /* 0xBDC5E43D, 0x693FB3C8 */ const V01: f64 = 1.27304834834123699328e-02; /* 0x3F8A1270, 0x91C9C71A */ const V02: f64 = 7.60068627350353253702e-05; /* 0x3F13ECBB, 0xF578C6C1 */ const V03: f64 = 2.59150851840457805467e-07; /* 0x3E91642D, 0x7FF202FD */ const V04: f64 = 4.41110311332675467403e-10; /* 0x3DFE5018, 0x3BD6D9EF */ pub fn y0(x: f64) -> f64 { let z: f64; let u: f64; let v: f64; let ix: u32; let lx: u32; ix = get_high_word(x); lx = get_low_word(x); /* y0(nan)=nan, y0(<0)=nan, y0(0)=-inf, y0(inf)=0 */ if ((ix << 1) | lx) == 0 { return -1.0 / 0.0; } if (ix >> 31) != 0 { return 0.0 / 0.0; } if ix >= 0x7ff00000 { return 1.0 / x; } if ix >= 0x40000000 { /* x >= 2 */ /* large ulp errors near zeros: 3.958, 7.086,.. */ return common(ix, x, true); } /* U(x^2)/V(x^2) + (2/pi)*j0(x)*log(x) */ if ix >= 0x3e400000 { /* x >= 2**-27 */ /* large ulp error near the first zero, x ~= 0.89 */ z = x * x; u = U00 + z * (U01 + z * (U02 + z * (U03 + z * (U04 + z * (U05 + z * U06))))); v = 1.0 + z * (V01 + z * (V02 + z * (V03 + z * V04))); return u / v + TPI * (j0(x) * log(x)); } return U00 + TPI * log(x); } /* The asymptotic expansions of pzero is * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. * For x >= 2, We approximate pzero by * pzero(x) = 1 + (R/S) * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 * S = 1 + pS0*s^2 + ... + pS4*s^10 * and * | pzero(x)-1-R/S | <= 2 ** ( -60.26) */ const PR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ -7.03124999999900357484e-02, /* 0xBFB1FFFF, 0xFFFFFD32 */ -8.08167041275349795626e+00, /* 0xC02029D0, 0xB44FA779 */ -2.57063105679704847262e+02, /* 0xC0701102, 0x7B19E863 */ -2.48521641009428822144e+03, /* 0xC0A36A6E, 0xCD4DCAFC */ -5.25304380490729545272e+03, /* 0xC0B4850B, 0x36CC643D */ ]; const PS8: [f64; 5] = [ 1.16534364619668181717e+02, /* 0x405D2233, 0x07A96751 */ 3.83374475364121826715e+03, /* 0x40ADF37D, 0x50596938 */ 4.05978572648472545552e+04, /* 0x40E3D2BB, 0x6EB6B05F */ 1.16752972564375915681e+05, /* 0x40FC810F, 0x8F9FA9BD */ 4.76277284146730962675e+04, /* 0x40E74177, 0x4F2C49DC */ ]; const PR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ -1.14125464691894502584e-11, /* 0xBDA918B1, 0x47E495CC */ -7.03124940873599280078e-02, /* 0xBFB1FFFF, 0xE69AFBC6 */ -4.15961064470587782438e+00, /* 0xC010A370, 0xF90C6BBF */ -6.76747652265167261021e+01, /* 0xC050EB2F, 0x5A7D1783 */ -3.31231299649172967747e+02, /* 0xC074B3B3, 0x6742CC63 */ -3.46433388365604912451e+02, /* 0xC075A6EF, 0x28A38BD7 */ ]; const PS5: [f64; 5] = [ 6.07539382692300335975e+01, /* 0x404E6081, 0x0C98C5DE */ 1.05125230595704579173e+03, /* 0x40906D02, 0x5C7E2864 */ 5.97897094333855784498e+03, /* 0x40B75AF8, 0x8FBE1D60 */ 9.62544514357774460223e+03, /* 0x40C2CCB8, 0xFA76FA38 */ 2.40605815922939109441e+03, /* 0x40A2CC1D, 0xC70BE864 */ ]; const PR3: [f64; 6] = [ /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ -2.54704601771951915620e-09, /* 0xBE25E103, 0x6FE1AA86 */ -7.03119616381481654654e-02, /* 0xBFB1FFF6, 0xF7C0E24B */ -2.40903221549529611423e+00, /* 0xC00345B2, 0xAEA48074 */ -2.19659774734883086467e+01, /* 0xC035F74A, 0x4CB94E14 */ -5.80791704701737572236e+01, /* 0xC04D0A22, 0x420A1A45 */ -3.14479470594888503854e+01, /* 0xC03F72AC, 0xA892D80F */ ]; const PS3: [f64; 5] = [ 3.58560338055209726349e+01, /* 0x4041ED92, 0x84077DD3 */ 3.61513983050303863820e+02, /* 0x40769839, 0x464A7C0E */ 1.19360783792111533330e+03, /* 0x4092A66E, 0x6D1061D6 */ 1.12799679856907414432e+03, /* 0x40919FFC, 0xB8C39B7E */ 1.73580930813335754692e+02, /* 0x4065B296, 0xFC379081 */ ]; const PR2: [f64; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ -8.87534333032526411254e-08, /* 0xBE77D316, 0xE927026D */ -7.03030995483624743247e-02, /* 0xBFB1FF62, 0x495E1E42 */ -1.45073846780952986357e+00, /* 0xBFF73639, 0x8A24A843 */ -7.63569613823527770791e+00, /* 0xC01E8AF3, 0xEDAFA7F3 */ -1.11931668860356747786e+01, /* 0xC02662E6, 0xC5246303 */ -3.23364579351335335033e+00, /* 0xC009DE81, 0xAF8FE70F */ ]; const PS2: [f64; 5] = [ 2.22202997532088808441e+01, /* 0x40363865, 0x908B5959 */ 1.36206794218215208048e+02, /* 0x4061069E, 0x0EE8878F */ 2.70470278658083486789e+02, /* 0x4070E786, 0x42EA079B */ 1.53875394208320329881e+02, /* 0x40633C03, 0x3AB6FAFF */ 1.46576176948256193810e+01, /* 0x402D50B3, 0x44391809 */ ]; fn pzero(x: f64) -> f64 { let p: &[f64; 6]; let q: &[f64; 5]; let z: f64; let r: f64; let s: f64; let mut ix: u32; ix = get_high_word(x); ix &= 0x7fffffff; if ix >= 0x40200000 { p = &PR8; q = &PS8; } else if ix >= 0x40122E8B { p = &PR5; q = &PS5; } else if ix >= 0x4006DB6D { p = &PR3; q = &PS3; } else /*ix >= 0x40000000*/ { p = &PR2; q = &PS2; } z = 1.0 / (x * x); r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); return 1.0 + r / s; } /* For x >= 8, the asymptotic expansions of qzero is * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. * We approximate pzero by * qzero(x) = s*(-1.25 + (R/S)) * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 * S = 1 + qS0*s^2 + ... + qS5*s^12 * and * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) */ const QR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ 7.32421874999935051953e-02, /* 0x3FB2BFFF, 0xFFFFFE2C */ 1.17682064682252693899e+01, /* 0x40278952, 0x5BB334D6 */ 5.57673380256401856059e+02, /* 0x40816D63, 0x15301825 */ 8.85919720756468632317e+03, /* 0x40C14D99, 0x3E18F46D */ 3.70146267776887834771e+04, /* 0x40E212D4, 0x0E901566 */ ]; const QS8: [f64; 6] = [ 1.63776026895689824414e+02, /* 0x406478D5, 0x365B39BC */ 8.09834494656449805916e+03, /* 0x40BFA258, 0x4E6B0563 */ 1.42538291419120476348e+05, /* 0x41016652, 0x54D38C3F */ 8.03309257119514397345e+05, /* 0x412883DA, 0x83A52B43 */ 8.40501579819060512818e+05, /* 0x4129A66B, 0x28DE0B3D */ -3.43899293537866615225e+05, /* 0xC114FD6D, 0x2C9530C5 */ ]; const QR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ 1.84085963594515531381e-11, /* 0x3DB43D8F, 0x29CC8CD9 */ 7.32421766612684765896e-02, /* 0x3FB2BFFF, 0xD172B04C */ 5.83563508962056953777e+00, /* 0x401757B0, 0xB9953DD3 */ 1.35111577286449829671e+02, /* 0x4060E392, 0x0A8788E9 */ 1.02724376596164097464e+03, /* 0x40900CF9, 0x9DC8C481 */ 1.98997785864605384631e+03, /* 0x409F17E9, 0x53C6E3A6 */ ]; const QS5: [f64; 6] = [ 8.27766102236537761883e+01, /* 0x4054B1B3, 0xFB5E1543 */ 2.07781416421392987104e+03, /* 0x40A03BA0, 0xDA21C0CE */ 1.88472887785718085070e+04, /* 0x40D267D2, 0x7B591E6D */ 5.67511122894947329769e+04, /* 0x40EBB5E3, 0x97E02372 */ 3.59767538425114471465e+04, /* 0x40E19118, 0x1F7A54A0 */ -5.35434275601944773371e+03, /* 0xC0B4EA57, 0xBEDBC609 */ ]; const QR3: [f64; 6] = [ /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ 4.37741014089738620906e-09, /* 0x3E32CD03, 0x6ADECB82 */ 7.32411180042911447163e-02, /* 0x3FB2BFEE, 0x0E8D0842 */ 3.34423137516170720929e+00, /* 0x400AC0FC, 0x61149CF5 */ 4.26218440745412650017e+01, /* 0x40454F98, 0x962DAEDD */ 1.70808091340565596283e+02, /* 0x406559DB, 0xE25EFD1F */ 1.66733948696651168575e+02, /* 0x4064D77C, 0x81FA21E0 */ ]; const QS3: [f64; 6] = [ 4.87588729724587182091e+01, /* 0x40486122, 0xBFE343A6 */ 7.09689221056606015736e+02, /* 0x40862D83, 0x86544EB3 */ 3.70414822620111362994e+03, /* 0x40ACF04B, 0xE44DFC63 */ 6.46042516752568917582e+03, /* 0x40B93C6C, 0xD7C76A28 */ 2.51633368920368957333e+03, /* 0x40A3A8AA, 0xD94FB1C0 */ -1.49247451836156386662e+02, /* 0xC062A7EB, 0x201CF40F */ ]; const QR2: [f64; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ 1.50444444886983272379e-07, /* 0x3E84313B, 0x54F76BDB */ 7.32234265963079278272e-02, /* 0x3FB2BEC5, 0x3E883E34 */ 1.99819174093815998816e+00, /* 0x3FFFF897, 0xE727779C */ 1.44956029347885735348e+01, /* 0x402CFDBF, 0xAAF96FE5 */ 3.16662317504781540833e+01, /* 0x403FAA8E, 0x29FBDC4A */ 1.62527075710929267416e+01, /* 0x403040B1, 0x71814BB4 */ ]; const QS2: [f64; 6] = [ 3.03655848355219184498e+01, /* 0x403E5D96, 0xF7C07AED */ 2.69348118608049844624e+02, /* 0x4070D591, 0xE4D14B40 */ 8.44783757595320139444e+02, /* 0x408A6645, 0x22B3BF22 */ 8.82935845112488550512e+02, /* 0x408B977C, 0x9C5CC214 */ 2.12666388511798828631e+02, /* 0x406A9553, 0x0E001365 */ -5.31095493882666946917e+00, /* 0xC0153E6A, 0xF8B32931 */ ]; fn qzero(x: f64) -> f64 { let p: &[f64; 6]; let q: &[f64; 6]; let s: f64; let r: f64; let z: f64; let mut ix: u32; ix = get_high_word(x); ix &= 0x7fffffff; if ix >= 0x40200000 { p = &QR8; q = &QS8; } else if ix >= 0x40122E8B { p = &QR5; q = &QS5; } else if ix >= 0x4006DB6D { p = &QR3; q = &QS3; } else /*ix >= 0x40000000*/ { p = &QR2; q = &QS2; } z = 1.0 / (x * x); r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); return (-0.125 + r / s) / x; } compiler_builtins-0.1.101/libm/src/math/j0f.rs000064400000000000000000000243201046102023000172000ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_j0f.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{cosf, fabsf, logf, sinf, sqrtf}; const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ fn common(ix: u32, x: f32, y0: bool) -> f32 { let z: f32; let s: f32; let mut c: f32; let mut ss: f32; let mut cc: f32; /* * j0(x) = 1/sqrt(pi) * (P(0,x)*cc - Q(0,x)*ss) / sqrt(x) * y0(x) = 1/sqrt(pi) * (P(0,x)*ss + Q(0,x)*cc) / sqrt(x) */ s = sinf(x); c = cosf(x); if y0 { c = -c; } cc = s + c; if ix < 0x7f000000 { ss = s - c; z = -cosf(2.0 * x); if s * c < 0.0 { cc = z / ss; } else { ss = z / cc; } if ix < 0x58800000 { if y0 { ss = -ss; } cc = pzerof(x) * cc - qzerof(x) * ss; } } return INVSQRTPI * cc / sqrtf(x); } /* R0/S0 on [0, 2.00] */ const R02: f32 = 1.5625000000e-02; /* 0x3c800000 */ const R03: f32 = -1.8997929874e-04; /* 0xb947352e */ const R04: f32 = 1.8295404516e-06; /* 0x35f58e88 */ const R05: f32 = -4.6183270541e-09; /* 0xb19eaf3c */ const S01: f32 = 1.5619102865e-02; /* 0x3c7fe744 */ const S02: f32 = 1.1692678527e-04; /* 0x38f53697 */ const S03: f32 = 5.1354652442e-07; /* 0x3509daa6 */ const S04: f32 = 1.1661400734e-09; /* 0x30a045e8 */ pub fn j0f(mut x: f32) -> f32 { let z: f32; let r: f32; let s: f32; let mut ix: u32; ix = x.to_bits(); ix &= 0x7fffffff; if ix >= 0x7f800000 { return 1.0 / (x * x); } x = fabsf(x); if ix >= 0x40000000 { /* |x| >= 2 */ /* large ulp error near zeros */ return common(ix, x, false); } if ix >= 0x3a000000 { /* |x| >= 2**-11 */ /* up to 4ulp error near 2 */ z = x * x; r = z * (R02 + z * (R03 + z * (R04 + z * R05))); s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * S04))); return (1.0 + x / 2.0) * (1.0 - x / 2.0) + z * (r / s); } if ix >= 0x21800000 { /* |x| >= 2**-60 */ x = 0.25 * x * x; } return 1.0 - x; } const U00: f32 = -7.3804296553e-02; /* 0xbd9726b5 */ const U01: f32 = 1.7666645348e-01; /* 0x3e34e80d */ const U02: f32 = -1.3818567619e-02; /* 0xbc626746 */ const U03: f32 = 3.4745343146e-04; /* 0x39b62a69 */ const U04: f32 = -3.8140706238e-06; /* 0xb67ff53c */ const U05: f32 = 1.9559013964e-08; /* 0x32a802ba */ const U06: f32 = -3.9820518410e-11; /* 0xae2f21eb */ const V01: f32 = 1.2730483897e-02; /* 0x3c509385 */ const V02: f32 = 7.6006865129e-05; /* 0x389f65e0 */ const V03: f32 = 2.5915085189e-07; /* 0x348b216c */ const V04: f32 = 4.4111031494e-10; /* 0x2ff280c2 */ pub fn y0f(x: f32) -> f32 { let z: f32; let u: f32; let v: f32; let ix: u32; ix = x.to_bits(); if (ix & 0x7fffffff) == 0 { return -1.0 / 0.0; } if (ix >> 31) != 0 { return 0.0 / 0.0; } if ix >= 0x7f800000 { return 1.0 / x; } if ix >= 0x40000000 { /* |x| >= 2.0 */ /* large ulp error near zeros */ return common(ix, x, true); } if ix >= 0x39000000 { /* x >= 2**-13 */ /* large ulp error at x ~= 0.89 */ z = x * x; u = U00 + z * (U01 + z * (U02 + z * (U03 + z * (U04 + z * (U05 + z * U06))))); v = 1.0 + z * (V01 + z * (V02 + z * (V03 + z * V04))); return u / v + TPI * (j0f(x) * logf(x)); } return U00 + TPI * logf(x); } /* The asymptotic expansions of pzero is * 1 - 9/128 s^2 + 11025/98304 s^4 - ..., where s = 1/x. * For x >= 2, We approximate pzero by * pzero(x) = 1 + (R/S) * where R = pR0 + pR1*s^2 + pR2*s^4 + ... + pR5*s^10 * S = 1 + pS0*s^2 + ... + pS4*s^10 * and * | pzero(x)-1-R/S | <= 2 ** ( -60.26) */ const PR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ 0.0000000000e+00, /* 0x00000000 */ -7.0312500000e-02, /* 0xbd900000 */ -8.0816707611e+00, /* 0xc1014e86 */ -2.5706311035e+02, /* 0xc3808814 */ -2.4852163086e+03, /* 0xc51b5376 */ -5.2530439453e+03, /* 0xc5a4285a */ ]; const PS8: [f32; 5] = [ 1.1653436279e+02, /* 0x42e91198 */ 3.8337448730e+03, /* 0x456f9beb */ 4.0597855469e+04, /* 0x471e95db */ 1.1675296875e+05, /* 0x47e4087c */ 4.7627726562e+04, /* 0x473a0bba */ ]; const PR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ -1.1412546255e-11, /* 0xad48c58a */ -7.0312492549e-02, /* 0xbd8fffff */ -4.1596107483e+00, /* 0xc0851b88 */ -6.7674766541e+01, /* 0xc287597b */ -3.3123129272e+02, /* 0xc3a59d9b */ -3.4643338013e+02, /* 0xc3ad3779 */ ]; const PS5: [f32; 5] = [ 6.0753936768e+01, /* 0x42730408 */ 1.0512523193e+03, /* 0x44836813 */ 5.9789707031e+03, /* 0x45bad7c4 */ 9.6254453125e+03, /* 0x461665c8 */ 2.4060581055e+03, /* 0x451660ee */ ]; const PR3: [f32; 6] = [ /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ -2.5470459075e-09, /* 0xb12f081b */ -7.0311963558e-02, /* 0xbd8fffb8 */ -2.4090321064e+00, /* 0xc01a2d95 */ -2.1965976715e+01, /* 0xc1afba52 */ -5.8079170227e+01, /* 0xc2685112 */ -3.1447946548e+01, /* 0xc1fb9565 */ ]; const PS3: [f32; 5] = [ 3.5856033325e+01, /* 0x420f6c94 */ 3.6151397705e+02, /* 0x43b4c1ca */ 1.1936077881e+03, /* 0x44953373 */ 1.1279968262e+03, /* 0x448cffe6 */ 1.7358093262e+02, /* 0x432d94b8 */ ]; const PR2: [f32; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ -8.8753431271e-08, /* 0xb3be98b7 */ -7.0303097367e-02, /* 0xbd8ffb12 */ -1.4507384300e+00, /* 0xbfb9b1cc */ -7.6356959343e+00, /* 0xc0f4579f */ -1.1193166733e+01, /* 0xc1331736 */ -3.2336456776e+00, /* 0xc04ef40d */ ]; const PS2: [f32; 5] = [ 2.2220300674e+01, /* 0x41b1c32d */ 1.3620678711e+02, /* 0x430834f0 */ 2.7047027588e+02, /* 0x43873c32 */ 1.5387539673e+02, /* 0x4319e01a */ 1.4657617569e+01, /* 0x416a859a */ ]; fn pzerof(x: f32) -> f32 { let p: &[f32; 6]; let q: &[f32; 5]; let z: f32; let r: f32; let s: f32; let mut ix: u32; ix = x.to_bits(); ix &= 0x7fffffff; if ix >= 0x41000000 { p = &PR8; q = &PS8; } else if ix >= 0x409173eb { p = &PR5; q = &PS5; } else if ix >= 0x4036d917 { p = &PR3; q = &PS3; } else /*ix >= 0x40000000*/ { p = &PR2; q = &PS2; } z = 1.0 / (x * x); r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); return 1.0 + r / s; } /* For x >= 8, the asymptotic expansions of qzero is * -1/8 s + 75/1024 s^3 - ..., where s = 1/x. * We approximate pzero by * qzero(x) = s*(-1.25 + (R/S)) * where R = qR0 + qR1*s^2 + qR2*s^4 + ... + qR5*s^10 * S = 1 + qS0*s^2 + ... + qS5*s^12 * and * | qzero(x)/s +1.25-R/S | <= 2 ** ( -61.22) */ const QR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ 0.0000000000e+00, /* 0x00000000 */ 7.3242187500e-02, /* 0x3d960000 */ 1.1768206596e+01, /* 0x413c4a93 */ 5.5767340088e+02, /* 0x440b6b19 */ 8.8591972656e+03, /* 0x460a6cca */ 3.7014625000e+04, /* 0x471096a0 */ ]; const QS8: [f32; 6] = [ 1.6377603149e+02, /* 0x4323c6aa */ 8.0983447266e+03, /* 0x45fd12c2 */ 1.4253829688e+05, /* 0x480b3293 */ 8.0330925000e+05, /* 0x49441ed4 */ 8.4050156250e+05, /* 0x494d3359 */ -3.4389928125e+05, /* 0xc8a7eb69 */ ]; const QR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ 1.8408595828e-11, /* 0x2da1ec79 */ 7.3242180049e-02, /* 0x3d95ffff */ 5.8356351852e+00, /* 0x40babd86 */ 1.3511157227e+02, /* 0x43071c90 */ 1.0272437744e+03, /* 0x448067cd */ 1.9899779053e+03, /* 0x44f8bf4b */ ]; const QS5: [f32; 6] = [ 8.2776611328e+01, /* 0x42a58da0 */ 2.0778142090e+03, /* 0x4501dd07 */ 1.8847289062e+04, /* 0x46933e94 */ 5.6751113281e+04, /* 0x475daf1d */ 3.5976753906e+04, /* 0x470c88c1 */ -5.3543427734e+03, /* 0xc5a752be */ ]; const QR3: [f32; 6] = [ /* for x in [4.547,2.8571]=1/[0.2199,0.35001] */ 4.3774099900e-09, /* 0x3196681b */ 7.3241114616e-02, /* 0x3d95ff70 */ 3.3442313671e+00, /* 0x405607e3 */ 4.2621845245e+01, /* 0x422a7cc5 */ 1.7080809021e+02, /* 0x432acedf */ 1.6673394775e+02, /* 0x4326bbe4 */ ]; const QS3: [f32; 6] = [ 4.8758872986e+01, /* 0x42430916 */ 7.0968920898e+02, /* 0x44316c1c */ 3.7041481934e+03, /* 0x4567825f */ 6.4604252930e+03, /* 0x45c9e367 */ 2.5163337402e+03, /* 0x451d4557 */ -1.4924745178e+02, /* 0xc3153f59 */ ]; const QR2: [f32; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ 1.5044444979e-07, /* 0x342189db */ 7.3223426938e-02, /* 0x3d95f62a */ 1.9981917143e+00, /* 0x3fffc4bf */ 1.4495602608e+01, /* 0x4167edfd */ 3.1666231155e+01, /* 0x41fd5471 */ 1.6252708435e+01, /* 0x4182058c */ ]; const QS2: [f32; 6] = [ 3.0365585327e+01, /* 0x41f2ecb8 */ 2.6934811401e+02, /* 0x4386ac8f */ 8.4478375244e+02, /* 0x44533229 */ 8.8293585205e+02, /* 0x445cbbe5 */ 2.1266638184e+02, /* 0x4354aa98 */ -5.3109550476e+00, /* 0xc0a9f358 */ ]; fn qzerof(x: f32) -> f32 { let p: &[f32; 6]; let q: &[f32; 6]; let s: f32; let r: f32; let z: f32; let mut ix: u32; ix = x.to_bits(); ix &= 0x7fffffff; if ix >= 0x41000000 { p = &QR8; q = &QS8; } else if ix >= 0x409173eb { p = &QR5; q = &QS5; } else if ix >= 0x4036d917 { p = &QR3; q = &QS3; } else /*ix >= 0x40000000*/ { p = &QR2; q = &QS2; } z = 1.0 / (x * x); r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); return (-0.125 + r / s) / x; } compiler_builtins-0.1.101/libm/src/math/j1.rs000064400000000000000000000350411046102023000170350ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_j1.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* j1(x), y1(x) * Bessel function of the first and second kinds of order zero. * Method -- j1(x): * 1. For tiny x, we use j1(x) = x/2 - x^3/16 + x^5/384 - ... * 2. Reduce x to |x| since j1(x)=-j1(-x), and * for x in (0,2) * j1(x) = x/2 + x*z*R0/S0, where z = x*x; * (precision: |j1/x - 1/2 - R0/S0 |<2**-61.51 ) * for x in (2,inf) * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x1)-q1(x)*sin(x1)) * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) * as follow: * cos(x1) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4) * = 1/sqrt(2) * (sin(x) - cos(x)) * sin(x1) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4) * = -1/sqrt(2) * (sin(x) + cos(x)) * (To avoid cancellation, use * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) * to compute the worse one.) * * 3 Special cases * j1(nan)= nan * j1(0) = 0 * j1(inf) = 0 * * Method -- y1(x): * 1. screen out x<=0 cases: y1(0)=-inf, y1(x<0)=NaN * 2. For x<2. * Since * y1(x) = 2/pi*(j1(x)*(ln(x/2)+Euler)-1/x-x/2+5/64*x^3-...) * therefore y1(x)-2/pi*j1(x)*ln(x)-1/x is an odd function. * We use the following function to approximate y1, * y1(x) = x*U(z)/V(z) + (2/pi)*(j1(x)*ln(x)-1/x), z= x^2 * where for x in [0,2] (abs err less than 2**-65.89) * U(z) = U0[0] + U0[1]*z + ... + U0[4]*z^4 * V(z) = 1 + v0[0]*z + ... + v0[4]*z^5 * Note: For tiny x, 1/x dominate y1 and hence * y1(tiny) = -2/pi/tiny, (choose tiny<2**-54) * 3. For x>=2. * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x1)+q1(x)*cos(x1)) * where x1 = x-3*pi/4. It is better to compute sin(x1),cos(x1) * by method mentioned above. */ use super::{cos, fabs, get_high_word, get_low_word, log, sin, sqrt}; const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ const TPI: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ fn common(ix: u32, x: f64, y1: bool, sign: bool) -> f64 { let z: f64; let mut s: f64; let c: f64; let mut ss: f64; let mut cc: f64; /* * j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x-3pi/4)-q1(x)*sin(x-3pi/4)) * y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x-3pi/4)+q1(x)*cos(x-3pi/4)) * * sin(x-3pi/4) = -(sin(x) + cos(x))/sqrt(2) * cos(x-3pi/4) = (sin(x) - cos(x))/sqrt(2) * sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x)) */ s = sin(x); if y1 { s = -s; } c = cos(x); cc = s - c; if ix < 0x7fe00000 { /* avoid overflow in 2*x */ ss = -s - c; z = cos(2.0 * x); if s * c > 0.0 { cc = z / ss; } else { ss = z / cc; } if ix < 0x48000000 { if y1 { ss = -ss; } cc = pone(x) * cc - qone(x) * ss; } } if sign { cc = -cc; } return INVSQRTPI * cc / sqrt(x); } /* R0/S0 on [0,2] */ const R00: f64 = -6.25000000000000000000e-02; /* 0xBFB00000, 0x00000000 */ const R01: f64 = 1.40705666955189706048e-03; /* 0x3F570D9F, 0x98472C61 */ const R02: f64 = -1.59955631084035597520e-05; /* 0xBEF0C5C6, 0xBA169668 */ const R03: f64 = 4.96727999609584448412e-08; /* 0x3E6AAAFA, 0x46CA0BD9 */ const S01: f64 = 1.91537599538363460805e-02; /* 0x3F939D0B, 0x12637E53 */ const S02: f64 = 1.85946785588630915560e-04; /* 0x3F285F56, 0xB9CDF664 */ const S03: f64 = 1.17718464042623683263e-06; /* 0x3EB3BFF8, 0x333F8498 */ const S04: f64 = 5.04636257076217042715e-09; /* 0x3E35AC88, 0xC97DFF2C */ const S05: f64 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ pub fn j1(x: f64) -> f64 { let mut z: f64; let r: f64; let s: f64; let mut ix: u32; let sign: bool; ix = get_high_word(x); sign = (ix >> 31) != 0; ix &= 0x7fffffff; if ix >= 0x7ff00000 { return 1.0 / (x * x); } if ix >= 0x40000000 { /* |x| >= 2 */ return common(ix, fabs(x), false, sign); } if ix >= 0x38000000 { /* |x| >= 2**-127 */ z = x * x; r = z * (R00 + z * (R01 + z * (R02 + z * R03))); s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * (S04 + z * S05)))); z = r / s; } else { /* avoid underflow, raise inexact if x!=0 */ z = x; } return (0.5 + z) * x; } const U0: [f64; 5] = [ -1.96057090646238940668e-01, /* 0xBFC91866, 0x143CBC8A */ 5.04438716639811282616e-02, /* 0x3FA9D3C7, 0x76292CD1 */ -1.91256895875763547298e-03, /* 0xBF5F55E5, 0x4844F50F */ 2.35252600561610495928e-05, /* 0x3EF8AB03, 0x8FA6B88E */ -9.19099158039878874504e-08, /* 0xBE78AC00, 0x569105B8 */ ]; const V0: [f64; 5] = [ 1.99167318236649903973e-02, /* 0x3F94650D, 0x3F4DA9F0 */ 2.02552581025135171496e-04, /* 0x3F2A8C89, 0x6C257764 */ 1.35608801097516229404e-06, /* 0x3EB6C05A, 0x894E8CA6 */ 6.22741452364621501295e-09, /* 0x3E3ABF1D, 0x5BA69A86 */ 1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */ ]; pub fn y1(x: f64) -> f64 { let z: f64; let u: f64; let v: f64; let ix: u32; let lx: u32; ix = get_high_word(x); lx = get_low_word(x); /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */ if (ix << 1 | lx) == 0 { return -1.0 / 0.0; } if (ix >> 31) != 0 { return 0.0 / 0.0; } if ix >= 0x7ff00000 { return 1.0 / x; } if ix >= 0x40000000 { /* x >= 2 */ return common(ix, x, true, false); } if ix < 0x3c900000 { /* x < 2**-54 */ return -TPI / x; } z = x * x; u = U0[0] + z * (U0[1] + z * (U0[2] + z * (U0[3] + z * U0[4]))); v = 1.0 + z * (V0[0] + z * (V0[1] + z * (V0[2] + z * (V0[3] + z * V0[4])))); return x * (u / v) + TPI * (j1(x) * log(x) - 1.0 / x); } /* For x >= 8, the asymptotic expansions of pone is * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. * We approximate pone by * pone(x) = 1 + (R/S) * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 * S = 1 + ps0*s^2 + ... + ps4*s^10 * and * | pone(x)-1-R/S | <= 2 ** ( -60.06) */ const PR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ 1.17187499999988647970e-01, /* 0x3FBDFFFF, 0xFFFFFCCE */ 1.32394806593073575129e+01, /* 0x402A7A9D, 0x357F7FCE */ 4.12051854307378562225e+02, /* 0x4079C0D4, 0x652EA590 */ 3.87474538913960532227e+03, /* 0x40AE457D, 0xA3A532CC */ 7.91447954031891731574e+03, /* 0x40BEEA7A, 0xC32782DD */ ]; const PS8: [f64; 5] = [ 1.14207370375678408436e+02, /* 0x405C8D45, 0x8E656CAC */ 3.65093083420853463394e+03, /* 0x40AC85DC, 0x964D274F */ 3.69562060269033463555e+04, /* 0x40E20B86, 0x97C5BB7F */ 9.76027935934950801311e+04, /* 0x40F7D42C, 0xB28F17BB */ 3.08042720627888811578e+04, /* 0x40DE1511, 0x697A0B2D */ ]; const PR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ 1.31990519556243522749e-11, /* 0x3DAD0667, 0xDAE1CA7D */ 1.17187493190614097638e-01, /* 0x3FBDFFFF, 0xE2C10043 */ 6.80275127868432871736e+00, /* 0x401B3604, 0x6E6315E3 */ 1.08308182990189109773e+02, /* 0x405B13B9, 0x452602ED */ 5.17636139533199752805e+02, /* 0x40802D16, 0xD052D649 */ 5.28715201363337541807e+02, /* 0x408085B8, 0xBB7E0CB7 */ ]; const PS5: [f64; 5] = [ 5.92805987221131331921e+01, /* 0x404DA3EA, 0xA8AF633D */ 9.91401418733614377743e+02, /* 0x408EFB36, 0x1B066701 */ 5.35326695291487976647e+03, /* 0x40B4E944, 0x5706B6FB */ 7.84469031749551231769e+03, /* 0x40BEA4B0, 0xB8A5BB15 */ 1.50404688810361062679e+03, /* 0x40978030, 0x036F5E51 */ ]; const PR3: [f64; 6] = [ 3.02503916137373618024e-09, /* 0x3E29FC21, 0xA7AD9EDD */ 1.17186865567253592491e-01, /* 0x3FBDFFF5, 0x5B21D17B */ 3.93297750033315640650e+00, /* 0x400F76BC, 0xE85EAD8A */ 3.51194035591636932736e+01, /* 0x40418F48, 0x9DA6D129 */ 9.10550110750781271918e+01, /* 0x4056C385, 0x4D2C1837 */ 4.85590685197364919645e+01, /* 0x4048478F, 0x8EA83EE5 */ ]; const PS3: [f64; 5] = [ 3.47913095001251519989e+01, /* 0x40416549, 0xA134069C */ 3.36762458747825746741e+02, /* 0x40750C33, 0x07F1A75F */ 1.04687139975775130551e+03, /* 0x40905B7C, 0x5037D523 */ 8.90811346398256432622e+02, /* 0x408BD67D, 0xA32E31E9 */ 1.03787932439639277504e+02, /* 0x4059F26D, 0x7C2EED53 */ ]; const PR2: [f64; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ 1.07710830106873743082e-07, /* 0x3E7CE9D4, 0xF65544F4 */ 1.17176219462683348094e-01, /* 0x3FBDFF42, 0xBE760D83 */ 2.36851496667608785174e+00, /* 0x4002F2B7, 0xF98FAEC0 */ 1.22426109148261232917e+01, /* 0x40287C37, 0x7F71A964 */ 1.76939711271687727390e+01, /* 0x4031B1A8, 0x177F8EE2 */ 5.07352312588818499250e+00, /* 0x40144B49, 0xA574C1FE */ ]; const PS2: [f64; 5] = [ 2.14364859363821409488e+01, /* 0x40356FBD, 0x8AD5ECDC */ 1.25290227168402751090e+02, /* 0x405F5293, 0x14F92CD5 */ 2.32276469057162813669e+02, /* 0x406D08D8, 0xD5A2DBD9 */ 1.17679373287147100768e+02, /* 0x405D6B7A, 0xDA1884A9 */ 8.36463893371618283368e+00, /* 0x4020BAB1, 0xF44E5192 */ ]; fn pone(x: f64) -> f64 { let p: &[f64; 6]; let q: &[f64; 5]; let z: f64; let r: f64; let s: f64; let mut ix: u32; ix = get_high_word(x); ix &= 0x7fffffff; if ix >= 0x40200000 { p = &PR8; q = &PS8; } else if ix >= 0x40122E8B { p = &PR5; q = &PS5; } else if ix >= 0x4006DB6D { p = &PR3; q = &PS3; } else /*ix >= 0x40000000*/ { p = &PR2; q = &PS2; } z = 1.0 / (x * x); r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); return 1.0 + r / s; } /* For x >= 8, the asymptotic expansions of qone is * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. * We approximate pone by * qone(x) = s*(0.375 + (R/S)) * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 * S = 1 + qs1*s^2 + ... + qs6*s^12 * and * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) */ const QR8: [f64; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */ -1.02539062499992714161e-01, /* 0xBFBA3FFF, 0xFFFFFDF3 */ -1.62717534544589987888e+01, /* 0xC0304591, 0xA26779F7 */ -7.59601722513950107896e+02, /* 0xC087BCD0, 0x53E4B576 */ -1.18498066702429587167e+04, /* 0xC0C724E7, 0x40F87415 */ -4.84385124285750353010e+04, /* 0xC0E7A6D0, 0x65D09C6A */ ]; const QS8: [f64; 6] = [ 1.61395369700722909556e+02, /* 0x40642CA6, 0xDE5BCDE5 */ 7.82538599923348465381e+03, /* 0x40BE9162, 0xD0D88419 */ 1.33875336287249578163e+05, /* 0x4100579A, 0xB0B75E98 */ 7.19657723683240939863e+05, /* 0x4125F653, 0x72869C19 */ 6.66601232617776375264e+05, /* 0x412457D2, 0x7719AD5C */ -2.94490264303834643215e+05, /* 0xC111F969, 0x0EA5AA18 */ ]; const QR5: [f64; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ -2.08979931141764104297e-11, /* 0xBDB6FA43, 0x1AA1A098 */ -1.02539050241375426231e-01, /* 0xBFBA3FFF, 0xCB597FEF */ -8.05644828123936029840e+00, /* 0xC0201CE6, 0xCA03AD4B */ -1.83669607474888380239e+02, /* 0xC066F56D, 0x6CA7B9B0 */ -1.37319376065508163265e+03, /* 0xC09574C6, 0x6931734F */ -2.61244440453215656817e+03, /* 0xC0A468E3, 0x88FDA79D */ ]; const QS5: [f64; 6] = [ 8.12765501384335777857e+01, /* 0x405451B2, 0xFF5A11B2 */ 1.99179873460485964642e+03, /* 0x409F1F31, 0xE77BF839 */ 1.74684851924908907677e+04, /* 0x40D10F1F, 0x0D64CE29 */ 4.98514270910352279316e+04, /* 0x40E8576D, 0xAABAD197 */ 2.79480751638918118260e+04, /* 0x40DB4B04, 0xCF7C364B */ -4.71918354795128470869e+03, /* 0xC0B26F2E, 0xFCFFA004 */ ]; const QR3: [f64; 6] = [ -5.07831226461766561369e-09, /* 0xBE35CFA9, 0xD38FC84F */ -1.02537829820837089745e-01, /* 0xBFBA3FEB, 0x51AEED54 */ -4.61011581139473403113e+00, /* 0xC01270C2, 0x3302D9FF */ -5.78472216562783643212e+01, /* 0xC04CEC71, 0xC25D16DA */ -2.28244540737631695038e+02, /* 0xC06C87D3, 0x4718D55F */ -2.19210128478909325622e+02, /* 0xC06B66B9, 0x5F5C1BF6 */ ]; const QS3: [f64; 6] = [ 4.76651550323729509273e+01, /* 0x4047D523, 0xCCD367E4 */ 6.73865112676699709482e+02, /* 0x40850EEB, 0xC031EE3E */ 3.38015286679526343505e+03, /* 0x40AA684E, 0x448E7C9A */ 5.54772909720722782367e+03, /* 0x40B5ABBA, 0xA61D54A6 */ 1.90311919338810798763e+03, /* 0x409DBC7A, 0x0DD4DF4B */ -1.35201191444307340817e+02, /* 0xC060E670, 0x290A311F */ ]; const QR2: [f64; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ -1.78381727510958865572e-07, /* 0xBE87F126, 0x44C626D2 */ -1.02517042607985553460e-01, /* 0xBFBA3E8E, 0x9148B010 */ -2.75220568278187460720e+00, /* 0xC0060484, 0x69BB4EDA */ -1.96636162643703720221e+01, /* 0xC033A9E2, 0xC168907F */ -4.23253133372830490089e+01, /* 0xC04529A3, 0xDE104AAA */ -2.13719211703704061733e+01, /* 0xC0355F36, 0x39CF6E52 */ ]; const QS2: [f64; 6] = [ 2.95333629060523854548e+01, /* 0x403D888A, 0x78AE64FF */ 2.52981549982190529136e+02, /* 0x406F9F68, 0xDB821CBA */ 7.57502834868645436472e+02, /* 0x4087AC05, 0xCE49A0F7 */ 7.39393205320467245656e+02, /* 0x40871B25, 0x48D4C029 */ 1.55949003336666123687e+02, /* 0x40637E5E, 0x3C3ED8D4 */ -4.95949898822628210127e+00, /* 0xC013D686, 0xE71BE86B */ ]; fn qone(x: f64) -> f64 { let p: &[f64; 6]; let q: &[f64; 6]; let s: f64; let r: f64; let z: f64; let mut ix: u32; ix = get_high_word(x); ix &= 0x7fffffff; if ix >= 0x40200000 { p = &QR8; q = &QS8; } else if ix >= 0x40122E8B { p = &QR5; q = &QS5; } else if ix >= 0x4006DB6D { p = &QR3; q = &QS3; } else /*ix >= 0x40000000*/ { p = &QR2; q = &QS2; } z = 1.0 / (x * x); r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); return (0.375 + r / s) / x; } compiler_builtins-0.1.101/libm/src/math/j1f.rs000064400000000000000000000246211046102023000172050ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_j1f.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{cosf, fabsf, logf, sinf, sqrtf}; const INVSQRTPI: f32 = 5.6418961287e-01; /* 0x3f106ebb */ const TPI: f32 = 6.3661974669e-01; /* 0x3f22f983 */ fn common(ix: u32, x: f32, y1: bool, sign: bool) -> f32 { let z: f64; let mut s: f64; let c: f64; let mut ss: f64; let mut cc: f64; s = sinf(x) as f64; if y1 { s = -s; } c = cosf(x) as f64; cc = s - c; if ix < 0x7f000000 { ss = -s - c; z = cosf(2.0 * x) as f64; if s * c > 0.0 { cc = z / ss; } else { ss = z / cc; } if ix < 0x58800000 { if y1 { ss = -ss; } cc = (ponef(x) as f64) * cc - (qonef(x) as f64) * ss; } } if sign { cc = -cc; } return (((INVSQRTPI as f64) * cc) / (sqrtf(x) as f64)) as f32; } /* R0/S0 on [0,2] */ const R00: f32 = -6.2500000000e-02; /* 0xbd800000 */ const R01: f32 = 1.4070566976e-03; /* 0x3ab86cfd */ const R02: f32 = -1.5995563444e-05; /* 0xb7862e36 */ const R03: f32 = 4.9672799207e-08; /* 0x335557d2 */ const S01: f32 = 1.9153760746e-02; /* 0x3c9ce859 */ const S02: f32 = 1.8594678841e-04; /* 0x3942fab6 */ const S03: f32 = 1.1771846857e-06; /* 0x359dffc2 */ const S04: f32 = 5.0463624390e-09; /* 0x31ad6446 */ const S05: f32 = 1.2354227016e-11; /* 0x2d59567e */ pub fn j1f(x: f32) -> f32 { let mut z: f32; let r: f32; let s: f32; let mut ix: u32; let sign: bool; ix = x.to_bits(); sign = (ix >> 31) != 0; ix &= 0x7fffffff; if ix >= 0x7f800000 { return 1.0 / (x * x); } if ix >= 0x40000000 { /* |x| >= 2 */ return common(ix, fabsf(x), false, sign); } if ix >= 0x39000000 { /* |x| >= 2**-13 */ z = x * x; r = z * (R00 + z * (R01 + z * (R02 + z * R03))); s = 1.0 + z * (S01 + z * (S02 + z * (S03 + z * (S04 + z * S05)))); z = 0.5 + r / s; } else { z = 0.5; } return z * x; } const U0: [f32; 5] = [ -1.9605709612e-01, /* 0xbe48c331 */ 5.0443872809e-02, /* 0x3d4e9e3c */ -1.9125689287e-03, /* 0xbafaaf2a */ 2.3525259166e-05, /* 0x37c5581c */ -9.1909917899e-08, /* 0xb3c56003 */ ]; const V0: [f32; 5] = [ 1.9916731864e-02, /* 0x3ca3286a */ 2.0255257550e-04, /* 0x3954644b */ 1.3560879779e-06, /* 0x35b602d4 */ 6.2274145840e-09, /* 0x31d5f8eb */ 1.6655924903e-11, /* 0x2d9281cf */ ]; pub fn y1f(x: f32) -> f32 { let z: f32; let u: f32; let v: f32; let ix: u32; ix = x.to_bits(); if (ix & 0x7fffffff) == 0 { return -1.0 / 0.0; } if (ix >> 31) != 0 { return 0.0 / 0.0; } if ix >= 0x7f800000 { return 1.0 / x; } if ix >= 0x40000000 { /* |x| >= 2.0 */ return common(ix, x, true, false); } if ix < 0x33000000 { /* x < 2**-25 */ return -TPI / x; } z = x * x; u = U0[0] + z * (U0[1] + z * (U0[2] + z * (U0[3] + z * U0[4]))); v = 1.0 + z * (V0[0] + z * (V0[1] + z * (V0[2] + z * (V0[3] + z * V0[4])))); return x * (u / v) + TPI * (j1f(x) * logf(x) - 1.0 / x); } /* For x >= 8, the asymptotic expansions of pone is * 1 + 15/128 s^2 - 4725/2^15 s^4 - ..., where s = 1/x. * We approximate pone by * pone(x) = 1 + (R/S) * where R = pr0 + pr1*s^2 + pr2*s^4 + ... + pr5*s^10 * S = 1 + ps0*s^2 + ... + ps4*s^10 * and * | pone(x)-1-R/S | <= 2 ** ( -60.06) */ const PR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ 0.0000000000e+00, /* 0x00000000 */ 1.1718750000e-01, /* 0x3df00000 */ 1.3239480972e+01, /* 0x4153d4ea */ 4.1205184937e+02, /* 0x43ce06a3 */ 3.8747453613e+03, /* 0x45722bed */ 7.9144794922e+03, /* 0x45f753d6 */ ]; const PS8: [f32; 5] = [ 1.1420736694e+02, /* 0x42e46a2c */ 3.6509309082e+03, /* 0x45642ee5 */ 3.6956207031e+04, /* 0x47105c35 */ 9.7602796875e+04, /* 0x47bea166 */ 3.0804271484e+04, /* 0x46f0a88b */ ]; const PR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ 1.3199052094e-11, /* 0x2d68333f */ 1.1718749255e-01, /* 0x3defffff */ 6.8027510643e+00, /* 0x40d9b023 */ 1.0830818176e+02, /* 0x42d89dca */ 5.1763616943e+02, /* 0x440168b7 */ 5.2871520996e+02, /* 0x44042dc6 */ ]; const PS5: [f32; 5] = [ 5.9280597687e+01, /* 0x426d1f55 */ 9.9140142822e+02, /* 0x4477d9b1 */ 5.3532670898e+03, /* 0x45a74a23 */ 7.8446904297e+03, /* 0x45f52586 */ 1.5040468750e+03, /* 0x44bc0180 */ ]; const PR3: [f32; 6] = [ 3.0250391081e-09, /* 0x314fe10d */ 1.1718686670e-01, /* 0x3defffab */ 3.9329774380e+00, /* 0x407bb5e7 */ 3.5119403839e+01, /* 0x420c7a45 */ 9.1055007935e+01, /* 0x42b61c2a */ 4.8559066772e+01, /* 0x42423c7c */ ]; const PS3: [f32; 5] = [ 3.4791309357e+01, /* 0x420b2a4d */ 3.3676245117e+02, /* 0x43a86198 */ 1.0468714600e+03, /* 0x4482dbe3 */ 8.9081134033e+02, /* 0x445eb3ed */ 1.0378793335e+02, /* 0x42cf936c */ ]; const PR2: [f32; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ 1.0771083225e-07, /* 0x33e74ea8 */ 1.1717621982e-01, /* 0x3deffa16 */ 2.3685150146e+00, /* 0x401795c0 */ 1.2242610931e+01, /* 0x4143e1bc */ 1.7693971634e+01, /* 0x418d8d41 */ 5.0735230446e+00, /* 0x40a25a4d */ ]; const PS2: [f32; 5] = [ 2.1436485291e+01, /* 0x41ab7dec */ 1.2529022980e+02, /* 0x42fa9499 */ 2.3227647400e+02, /* 0x436846c7 */ 1.1767937469e+02, /* 0x42eb5bd7 */ 8.3646392822e+00, /* 0x4105d590 */ ]; fn ponef(x: f32) -> f32 { let p: &[f32; 6]; let q: &[f32; 5]; let z: f32; let r: f32; let s: f32; let mut ix: u32; ix = x.to_bits(); ix &= 0x7fffffff; if ix >= 0x41000000 { p = &PR8; q = &PS8; } else if ix >= 0x409173eb { p = &PR5; q = &PS5; } else if ix >= 0x4036d917 { p = &PR3; q = &PS3; } else /*ix >= 0x40000000*/ { p = &PR2; q = &PS2; } z = 1.0 / (x * x); r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * q[4])))); return 1.0 + r / s; } /* For x >= 8, the asymptotic expansions of qone is * 3/8 s - 105/1024 s^3 - ..., where s = 1/x. * We approximate pone by * qone(x) = s*(0.375 + (R/S)) * where R = qr1*s^2 + qr2*s^4 + ... + qr5*s^10 * S = 1 + qs1*s^2 + ... + qs6*s^12 * and * | qone(x)/s -0.375-R/S | <= 2 ** ( -61.13) */ const QR8: [f32; 6] = [ /* for x in [inf, 8]=1/[0,0.125] */ 0.0000000000e+00, /* 0x00000000 */ -1.0253906250e-01, /* 0xbdd20000 */ -1.6271753311e+01, /* 0xc1822c8d */ -7.5960174561e+02, /* 0xc43de683 */ -1.1849806641e+04, /* 0xc639273a */ -4.8438511719e+04, /* 0xc73d3683 */ ]; const QS8: [f32; 6] = [ 1.6139537048e+02, /* 0x43216537 */ 7.8253862305e+03, /* 0x45f48b17 */ 1.3387534375e+05, /* 0x4802bcd6 */ 7.1965775000e+05, /* 0x492fb29c */ 6.6660125000e+05, /* 0x4922be94 */ -2.9449025000e+05, /* 0xc88fcb48 */ ]; const QR5: [f32; 6] = [ /* for x in [8,4.5454]=1/[0.125,0.22001] */ -2.0897993405e-11, /* 0xadb7d219 */ -1.0253904760e-01, /* 0xbdd1fffe */ -8.0564479828e+00, /* 0xc100e736 */ -1.8366960144e+02, /* 0xc337ab6b */ -1.3731937256e+03, /* 0xc4aba633 */ -2.6124443359e+03, /* 0xc523471c */ ]; const QS5: [f32; 6] = [ 8.1276550293e+01, /* 0x42a28d98 */ 1.9917987061e+03, /* 0x44f8f98f */ 1.7468484375e+04, /* 0x468878f8 */ 4.9851425781e+04, /* 0x4742bb6d */ 2.7948074219e+04, /* 0x46da5826 */ -4.7191835938e+03, /* 0xc5937978 */ ]; const QR3: [f32; 6] = [ -5.0783124372e-09, /* 0xb1ae7d4f */ -1.0253783315e-01, /* 0xbdd1ff5b */ -4.6101160049e+00, /* 0xc0938612 */ -5.7847221375e+01, /* 0xc267638e */ -2.2824453735e+02, /* 0xc3643e9a */ -2.1921012878e+02, /* 0xc35b35cb */ ]; const QS3: [f32; 6] = [ 4.7665153503e+01, /* 0x423ea91e */ 6.7386511230e+02, /* 0x4428775e */ 3.3801528320e+03, /* 0x45534272 */ 5.5477290039e+03, /* 0x45ad5dd5 */ 1.9031191406e+03, /* 0x44ede3d0 */ -1.3520118713e+02, /* 0xc3073381 */ ]; const QR2: [f32; 6] = [ /* for x in [2.8570,2]=1/[0.3499,0.5] */ -1.7838172539e-07, /* 0xb43f8932 */ -1.0251704603e-01, /* 0xbdd1f475 */ -2.7522056103e+00, /* 0xc0302423 */ -1.9663616180e+01, /* 0xc19d4f16 */ -4.2325313568e+01, /* 0xc2294d1f */ -2.1371921539e+01, /* 0xc1aaf9b2 */ ]; const QS2: [f32; 6] = [ 2.9533363342e+01, /* 0x41ec4454 */ 2.5298155212e+02, /* 0x437cfb47 */ 7.5750280762e+02, /* 0x443d602e */ 7.3939318848e+02, /* 0x4438d92a */ 1.5594900513e+02, /* 0x431bf2f2 */ -4.9594988823e+00, /* 0xc09eb437 */ ]; fn qonef(x: f32) -> f32 { let p: &[f32; 6]; let q: &[f32; 6]; let s: f32; let r: f32; let z: f32; let mut ix: u32; ix = x.to_bits(); ix &= 0x7fffffff; if ix >= 0x41000000 { p = &QR8; q = &QS8; } else if ix >= 0x409173eb { p = &QR5; q = &QS5; } else if ix >= 0x4036d917 { p = &QR3; q = &QS3; } else /*ix >= 0x40000000*/ { p = &QR2; q = &QS2; } z = 1.0 / (x * x); r = p[0] + z * (p[1] + z * (p[2] + z * (p[3] + z * (p[4] + z * p[5])))); s = 1.0 + z * (q[0] + z * (q[1] + z * (q[2] + z * (q[3] + z * (q[4] + z * q[5]))))); return (0.375 + r / s) / x; } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::{j1f, y1f}; #[test] fn test_j1f_2488() { // 0x401F3E49 assert_eq!(j1f(2.4881766_f32), 0.49999475_f32); } #[test] fn test_y1f_2002() { //allow slightly different result on x87 let res = y1f(2.0000002_f32); if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32) { return; } assert_eq!(res, -0.10703229_f32); } } compiler_builtins-0.1.101/libm/src/math/jn.rs000064400000000000000000000237031046102023000171340ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_jn.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* * jn(n, x), yn(n, x) * floating point Bessel's function of the 1st and 2nd kind * of order n * * Special cases: * y0(0)=y1(0)=yn(n,0) = -inf with division by zero signal; * y0(-ve)=y1(-ve)=yn(n,-ve) are NaN with invalid signal. * Note 2. About jn(n,x), yn(n,x) * For n=0, j0(x) is called, * for n=1, j1(x) is called, * for n<=x, forward recursion is used starting * from values of j0(x) and j1(x). * for n>x, a continued fraction approximation to * j(n,x)/j(n-1,x) is evaluated and then backward * recursion is used starting from a supposed value * for j(n,x). The resulting value of j(0,x) is * compared with the actual value to correct the * supposed value of j(n,x). * * yn(n,x) is similar in all respects, except * that forward recursion is used for all * values of n>1. */ use super::{cos, fabs, get_high_word, get_low_word, j0, j1, log, sin, sqrt, y0, y1}; const INVSQRTPI: f64 = 5.64189583547756279280e-01; /* 0x3FE20DD7, 0x50429B6D */ pub fn jn(n: i32, mut x: f64) -> f64 { let mut ix: u32; let lx: u32; let nm1: i32; let mut i: i32; let mut sign: bool; let mut a: f64; let mut b: f64; let mut temp: f64; ix = get_high_word(x); lx = get_low_word(x); sign = (ix >> 31) != 0; ix &= 0x7fffffff; // -lx == !lx + 1 if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { /* nan */ return x; } /* J(-n,x) = (-1)^n * J(n, x), J(n, -x) = (-1)^n * J(n, x) * Thus, J(-n,x) = J(n,-x) */ /* nm1 = |n|-1 is used instead of |n| to handle n==INT_MIN */ if n == 0 { return j0(x); } if n < 0 { nm1 = -(n + 1); x = -x; sign = !sign; } else { nm1 = n - 1; } if nm1 == 0 { return j1(x); } sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ x = fabs(x); if (ix | lx) == 0 || ix == 0x7ff00000 { /* if x is 0 or inf */ b = 0.0; } else if (nm1 as f64) < x { /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ if ix >= 0x52d00000 { /* x > 2**302 */ /* (x >> n**2) * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) * Let s=sin(x), c=cos(x), * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then * * n sin(xn)*sqt2 cos(xn)*sqt2 * ---------------------------------- * 0 s-c c+s * 1 -s-c -c+s * 2 -s+c -c-s * 3 s+c c-s */ temp = match nm1 & 3 { 0 => -cos(x) + sin(x), 1 => -cos(x) - sin(x), 2 => cos(x) - sin(x), 3 | _ => cos(x) + sin(x), }; b = INVSQRTPI * temp / sqrt(x); } else { a = j0(x); b = j1(x); i = 0; while i < nm1 { i += 1; temp = b; b = b * (2.0 * (i as f64) / x) - a; /* avoid underflow */ a = temp; } } } else { if ix < 0x3e100000 { /* x < 2**-29 */ /* x is tiny, return the first Taylor expansion of J(n,x) * J(n,x) = 1/n!*(x/2)^n - ... */ if nm1 > 32 { /* underflow */ b = 0.0; } else { temp = x * 0.5; b = temp; a = 1.0; i = 2; while i <= nm1 + 1 { a *= i as f64; /* a = n! */ b *= temp; /* b = (x/2)^n */ i += 1; } b = b / a; } } else { /* use backward recurrence */ /* x x^2 x^2 * J(n,x)/J(n-1,x) = ---- ------ ------ ..... * 2n - 2(n+1) - 2(n+2) * * 1 1 1 * (for large x) = ---- ------ ------ ..... * 2n 2(n+1) 2(n+2) * -- - ------ - ------ - * x x x * * Let w = 2n/x and h=2/x, then the above quotient * is equal to the continued fraction: * 1 * = ----------------------- * 1 * w - ----------------- * 1 * w+h - --------- * w+2h - ... * * To determine how many terms needed, let * Q(0) = w, Q(1) = w(w+h) - 1, * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), * When Q(k) > 1e4 good for single * When Q(k) > 1e9 good for double * When Q(k) > 1e17 good for quadruple */ /* determine k */ let mut t: f64; let mut q0: f64; let mut q1: f64; let mut w: f64; let h: f64; let mut z: f64; let mut tmp: f64; let nf: f64; let mut k: i32; nf = (nm1 as f64) + 1.0; w = 2.0 * nf / x; h = 2.0 / x; z = w + h; q0 = w; q1 = w * z - 1.0; k = 1; while q1 < 1.0e9 { k += 1; z += h; tmp = z * q1 - q0; q0 = q1; q1 = tmp; } t = 0.0; i = k; while i >= 0 { t = 1.0 / (2.0 * ((i as f64) + nf) / x - t); i -= 1; } a = t; b = 1.0; /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) * Hence, if n*(log(2n/x)) > ... * single 8.8722839355e+01 * double 7.09782712893383973096e+02 * long double 1.1356523406294143949491931077970765006170e+04 * then recurrent value may overflow and the result is * likely underflow to zero */ tmp = nf * log(fabs(w)); if tmp < 7.09782712893383973096e+02 { i = nm1; while i > 0 { temp = b; b = b * (2.0 * (i as f64)) / x - a; a = temp; i -= 1; } } else { i = nm1; while i > 0 { temp = b; b = b * (2.0 * (i as f64)) / x - a; a = temp; /* scale b to avoid spurious overflow */ let x1p500 = f64::from_bits(0x5f30000000000000); // 0x1p500 == 2^500 if b > x1p500 { a /= b; t /= b; b = 1.0; } i -= 1; } } z = j0(x); w = j1(x); if fabs(z) >= fabs(w) { b = t * z / b; } else { b = t * w / a; } } } if sign { -b } else { b } } pub fn yn(n: i32, x: f64) -> f64 { let mut ix: u32; let lx: u32; let mut ib: u32; let nm1: i32; let mut sign: bool; let mut i: i32; let mut a: f64; let mut b: f64; let mut temp: f64; ix = get_high_word(x); lx = get_low_word(x); sign = (ix >> 31) != 0; ix &= 0x7fffffff; // -lx == !lx + 1 if (ix | (lx | ((!lx).wrapping_add(1))) >> 31) > 0x7ff00000 { /* nan */ return x; } if sign && (ix | lx) != 0 { /* x < 0 */ return 0.0 / 0.0; } if ix == 0x7ff00000 { return 0.0; } if n == 0 { return y0(x); } if n < 0 { nm1 = -(n + 1); sign = (n & 1) != 0; } else { nm1 = n - 1; sign = false; } if nm1 == 0 { if sign { return -y1(x); } else { return y1(x); } } if ix >= 0x52d00000 { /* x > 2**302 */ /* (x >> n**2) * Jn(x) = cos(x-(2n+1)*pi/4)*sqrt(2/x*pi) * Yn(x) = sin(x-(2n+1)*pi/4)*sqrt(2/x*pi) * Let s=sin(x), c=cos(x), * xn=x-(2n+1)*pi/4, sqt2 = sqrt(2),then * * n sin(xn)*sqt2 cos(xn)*sqt2 * ---------------------------------- * 0 s-c c+s * 1 -s-c -c+s * 2 -s+c -c-s * 3 s+c c-s */ temp = match nm1 & 3 { 0 => -sin(x) - cos(x), 1 => -sin(x) + cos(x), 2 => sin(x) + cos(x), 3 | _ => sin(x) - cos(x), }; b = INVSQRTPI * temp / sqrt(x); } else { a = y0(x); b = y1(x); /* quit if b is -inf */ ib = get_high_word(b); i = 0; while i < nm1 && ib != 0xfff00000 { i += 1; temp = b; b = (2.0 * (i as f64) / x) * b - a; ib = get_high_word(b); a = temp; } } if sign { -b } else { b } } compiler_builtins-0.1.101/libm/src/math/jnf.rs000064400000000000000000000154221046102023000173010ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_jnf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{fabsf, j0f, j1f, logf, y0f, y1f}; pub fn jnf(n: i32, mut x: f32) -> f32 { let mut ix: u32; let mut nm1: i32; let mut sign: bool; let mut i: i32; let mut a: f32; let mut b: f32; let mut temp: f32; ix = x.to_bits(); sign = (ix >> 31) != 0; ix &= 0x7fffffff; if ix > 0x7f800000 { /* nan */ return x; } /* J(-n,x) = J(n,-x), use |n|-1 to avoid overflow in -n */ if n == 0 { return j0f(x); } if n < 0 { nm1 = -(n + 1); x = -x; sign = !sign; } else { nm1 = n - 1; } if nm1 == 0 { return j1f(x); } sign &= (n & 1) != 0; /* even n: 0, odd n: signbit(x) */ x = fabsf(x); if ix == 0 || ix == 0x7f800000 { /* if x is 0 or inf */ b = 0.0; } else if (nm1 as f32) < x { /* Safe to use J(n+1,x)=2n/x *J(n,x)-J(n-1,x) */ a = j0f(x); b = j1f(x); i = 0; while i < nm1 { i += 1; temp = b; b = b * (2.0 * (i as f32) / x) - a; a = temp; } } else { if ix < 0x35800000 { /* x < 2**-20 */ /* x is tiny, return the first Taylor expansion of J(n,x) * J(n,x) = 1/n!*(x/2)^n - ... */ if nm1 > 8 { /* underflow */ nm1 = 8; } temp = 0.5 * x; b = temp; a = 1.0; i = 2; while i <= nm1 + 1 { a *= i as f32; /* a = n! */ b *= temp; /* b = (x/2)^n */ i += 1; } b = b / a; } else { /* use backward recurrence */ /* x x^2 x^2 * J(n,x)/J(n-1,x) = ---- ------ ------ ..... * 2n - 2(n+1) - 2(n+2) * * 1 1 1 * (for large x) = ---- ------ ------ ..... * 2n 2(n+1) 2(n+2) * -- - ------ - ------ - * x x x * * Let w = 2n/x and h=2/x, then the above quotient * is equal to the continued fraction: * 1 * = ----------------------- * 1 * w - ----------------- * 1 * w+h - --------- * w+2h - ... * * To determine how many terms needed, let * Q(0) = w, Q(1) = w(w+h) - 1, * Q(k) = (w+k*h)*Q(k-1) - Q(k-2), * When Q(k) > 1e4 good for single * When Q(k) > 1e9 good for double * When Q(k) > 1e17 good for quadruple */ /* determine k */ let mut t: f32; let mut q0: f32; let mut q1: f32; let mut w: f32; let h: f32; let mut z: f32; let mut tmp: f32; let nf: f32; let mut k: i32; nf = (nm1 as f32) + 1.0; w = 2.0 * (nf as f32) / x; h = 2.0 / x; z = w + h; q0 = w; q1 = w * z - 1.0; k = 1; while q1 < 1.0e4 { k += 1; z += h; tmp = z * q1 - q0; q0 = q1; q1 = tmp; } t = 0.0; i = k; while i >= 0 { t = 1.0 / (2.0 * ((i as f32) + nf) / x - t); i -= 1; } a = t; b = 1.0; /* estimate log((2/x)^n*n!) = n*log(2/x)+n*ln(n) * Hence, if n*(log(2n/x)) > ... * single 8.8722839355e+01 * double 7.09782712893383973096e+02 * long double 1.1356523406294143949491931077970765006170e+04 * then recurrent value may overflow and the result is * likely underflow to zero */ tmp = nf * logf(fabsf(w)); if tmp < 88.721679688 { i = nm1; while i > 0 { temp = b; b = 2.0 * (i as f32) * b / x - a; a = temp; i -= 1; } } else { i = nm1; while i > 0 { temp = b; b = 2.0 * (i as f32) * b / x - a; a = temp; /* scale b to avoid spurious overflow */ let x1p60 = f32::from_bits(0x5d800000); // 0x1p60 == 2^60 if b > x1p60 { a /= b; t /= b; b = 1.0; } i -= 1; } } z = j0f(x); w = j1f(x); if fabsf(z) >= fabsf(w) { b = t * z / b; } else { b = t * w / a; } } } if sign { -b } else { b } } pub fn ynf(n: i32, x: f32) -> f32 { let mut ix: u32; let mut ib: u32; let nm1: i32; let mut sign: bool; let mut i: i32; let mut a: f32; let mut b: f32; let mut temp: f32; ix = x.to_bits(); sign = (ix >> 31) != 0; ix &= 0x7fffffff; if ix > 0x7f800000 { /* nan */ return x; } if sign && ix != 0 { /* x < 0 */ return 0.0 / 0.0; } if ix == 0x7f800000 { return 0.0; } if n == 0 { return y0f(x); } if n < 0 { nm1 = -(n + 1); sign = (n & 1) != 0; } else { nm1 = n - 1; sign = false; } if nm1 == 0 { if sign { return -y1f(x); } else { return y1f(x); } } a = y0f(x); b = y1f(x); /* quit if b is -inf */ ib = b.to_bits(); i = 0; while i < nm1 && ib != 0xff800000 { i += 1; temp = b; b = (2.0 * (i as f32) / x) * b - a; ib = b.to_bits(); a = temp; } if sign { -b } else { b } } compiler_builtins-0.1.101/libm/src/math/k_cos.rs000064400000000000000000000056511046102023000176250ustar 00000000000000// origin: FreeBSD /usr/src/lib/msun/src/k_cos.c // // ==================================================== // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. // // Developed at SunSoft, a Sun Microsystems, Inc. business. // Permission to use, copy, modify, and distribute this // software is freely granted, provided that this notice // is preserved. // ==================================================== const C1: f64 = 4.16666666666666019037e-02; /* 0x3FA55555, 0x5555554C */ const C2: f64 = -1.38888888888741095749e-03; /* 0xBF56C16C, 0x16C15177 */ const C3: f64 = 2.48015872894767294178e-05; /* 0x3EFA01A0, 0x19CB1590 */ const C4: f64 = -2.75573143513906633035e-07; /* 0xBE927E4F, 0x809C52AD */ const C5: f64 = 2.08757232129817482790e-09; /* 0x3E21EE9E, 0xBDB4B1C4 */ const C6: f64 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ // kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 // Input x is assumed to be bounded by ~pi/4 in magnitude. // Input y is the tail of x. // // Algorithm // 1. Since cos(-x) = cos(x), we need only to consider positive x. // 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. // 3. cos(x) is approximated by a polynomial of degree 14 on // [0,pi/4] // 4 14 // cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x // where the remez error is // // | 2 4 6 8 10 12 14 | -58 // |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 // | | // // 4 6 8 10 12 14 // 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then // cos(x) ~ 1 - x*x/2 + r // since cos(x+y) ~ cos(x) - sin(x)*y // ~ cos(x) - x*y, // a correction term is necessary in cos(x) and hence // cos(x+y) = 1 - (x*x/2 - (r - x*y)) // For better accuracy, rearrange to // cos(x+y) ~ w + (tmp + (r-x*y)) // where w = 1 - x*x/2 and tmp is a tiny correction term // (1 - x*x/2 == w + tmp exactly in infinite precision). // The exactness of w + tmp in infinite precision depends on w // and tmp having the same precision as x. If they have extra // precision due to compiler bugs, then the extra precision is // only good provided it is retained in all terms of the final // expression for cos(). Retention happens in all cases tested // under FreeBSD, so don't pessimize things by forcibly clipping // any extra precision in w. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_cos(x: f64, y: f64) -> f64 { let z = x * x; let w = z * z; let r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6)); let hz = 0.5 * z; let w = 1.0 - hz; w + (((1.0 - w) - hz) + (z * r - x * y)) } compiler_builtins-0.1.101/libm/src/math/k_cosf.rs000064400000000000000000000021541046102023000177660ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. * Debugged and optimized by Bruce D. Evans. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ const C0: f64 = -0.499999997251031003120; /* -0x1ffffffd0c5e81.0p-54 */ const C1: f64 = 0.0416666233237390631894; /* 0x155553e1053a42.0p-57 */ const C2: f64 = -0.00138867637746099294692; /* -0x16c087e80f1e27.0p-62 */ const C3: f64 = 0.0000243904487962774090654; /* 0x199342e0ee5069.0p-68 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_cosf(x: f64) -> f32 { let z = x * x; let w = z * z; let r = C2 + z * C3; (((1.0 + z * C0) + w * C1) + (w * z) * r) as f32 } compiler_builtins-0.1.101/libm/src/math/k_expo2.rs000064400000000000000000000010661046102023000200720ustar 00000000000000use super::exp; /* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ const K: i32 = 2043; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_expo2(x: f64) -> f64 { let k_ln2 = f64::from_bits(0x40962066151add8b); /* note that k is odd and scale*scale overflows */ let scale = f64::from_bits(((((0x3ff + K / 2) as u32) << 20) as u64) << 32); /* exp(x - k ln2) * 2**(k-1) */ exp(x - k_ln2) * scale * scale } compiler_builtins-0.1.101/libm/src/math/k_expo2f.rs000064400000000000000000000010361046102023000202350ustar 00000000000000use super::expf; /* k is such that k*ln2 has minimal relative error and x - kln2 > log(FLT_MIN) */ const K: i32 = 235; /* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_expo2f(x: f32) -> f32 { let k_ln2 = f32::from_bits(0x4322e3bc); /* note that k is odd and scale*scale overflows */ let scale = f32::from_bits(((0x7f + K / 2) as u32) << 23); /* exp(x - k ln2) * 2**(k-1) */ expf(x - k_ln2) * scale * scale } compiler_builtins-0.1.101/libm/src/math/k_sin.rs000064400000000000000000000046461046102023000176350ustar 00000000000000// origin: FreeBSD /usr/src/lib/msun/src/k_sin.c // // ==================================================== // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. // // Developed at SunSoft, a Sun Microsystems, Inc. business. // Permission to use, copy, modify, and distribute this // software is freely granted, provided that this notice // is preserved. // ==================================================== const S1: f64 = -1.66666666666666324348e-01; /* 0xBFC55555, 0x55555549 */ const S2: f64 = 8.33333333332248946124e-03; /* 0x3F811111, 0x1110F8A6 */ const S3: f64 = -1.98412698298579493134e-04; /* 0xBF2A01A0, 0x19C161D5 */ const S4: f64 = 2.75573137070700676789e-06; /* 0x3EC71DE3, 0x57B1FE7D */ const S5: f64 = -2.50507602534068634195e-08; /* 0xBE5AE5E6, 0x8A2B9CEB */ const S6: f64 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ // kernel sin function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 // Input x is assumed to be bounded by ~pi/4 in magnitude. // Input y is the tail of x. // Input iy indicates whether y is 0. (if iy=0, y assume to be 0). // // Algorithm // 1. Since sin(-x) = -sin(x), we need only to consider positive x. // 2. Callers must return sin(-0) = -0 without calling here since our // odd polynomial is not evaluated in a way that preserves -0. // Callers may do the optimization sin(x) ~ x for tiny x. // 3. sin(x) is approximated by a polynomial of degree 13 on // [0,pi/4] // 3 13 // sin(x) ~ x + S1*x + ... + S6*x // where // // |sin(x) 2 4 6 8 10 12 | -58 // |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 // | x | // // 4. sin(x+y) = sin(x) + sin'(x')*y // ~ sin(x) + (1-x*x/2)*y // For better accuracy, let // 3 2 2 2 2 // r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) // then 3 2 // sin(x) = x + (S1*x + (x *(r-y/2)+y)) #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_sin(x: f64, y: f64, iy: i32) -> f64 { let z = x * x; let w = z * z; let r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); let v = z * x; if iy == 0 { x + v * (S1 + z * r) } else { x - ((z * (0.5 * y - v * r) - y) - v * S1) } } compiler_builtins-0.1.101/libm/src/math/k_sinf.rs000064400000000000000000000021611046102023000177710ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. * Optimized by Bruce D. Evans. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ const S1: f64 = -0.166666666416265235595; /* -0x15555554cbac77.0p-55 */ const S2: f64 = 0.0083333293858894631756; /* 0x111110896efbb2.0p-59 */ const S3: f64 = -0.000198393348360966317347; /* -0x1a00f9e2cae774.0p-65 */ const S4: f64 = 0.0000027183114939898219064; /* 0x16cd878c3b46a7.0p-71 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_sinf(x: f64) -> f32 { let z = x * x; let w = z * z; let r = S3 + z * S4; let s = z * x; ((x + s * (S1 + z * S2)) + s * w * r) as f32 } compiler_builtins-0.1.101/libm/src/math/k_tan.rs000064400000000000000000000102141046102023000176120ustar 00000000000000// origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ // // ==================================================== // Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. // // Permission to use, copy, modify, and distribute this // software is freely granted, provided that this notice // is preserved. // ==================================================== // kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 // Input x is assumed to be bounded by ~pi/4 in magnitude. // Input y is the tail of x. // Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned. // // Algorithm // 1. Since tan(-x) = -tan(x), we need only to consider positive x. // 2. Callers must return tan(-0) = -0 without calling here since our // odd polynomial is not evaluated in a way that preserves -0. // Callers may do the optimization tan(x) ~ x for tiny x. // 3. tan(x) is approximated by a odd polynomial of degree 27 on // [0,0.67434] // 3 27 // tan(x) ~ x + T1*x + ... + T13*x // where // // |tan(x) 2 4 26 | -59.2 // |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 // | x | // // Note: tan(x+y) = tan(x) + tan'(x)*y // ~ tan(x) + (1+x*x)*y // Therefore, for better accuracy in computing tan(x+y), let // 3 2 2 2 2 // r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) // then // 3 2 // tan(x+y) = x + (T1*x + (x *(r+y)+y)) // // 4. For x in [0.67434,pi/4], let y = pi/4 - x, then // tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) // = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) static T: [f64; 13] = [ 3.33333333333334091986e-01, /* 3FD55555, 55555563 */ 1.33333333333201242699e-01, /* 3FC11111, 1110FE7A */ 5.39682539762260521377e-02, /* 3FABA1BA, 1BB341FE */ 2.18694882948595424599e-02, /* 3F9664F4, 8406D637 */ 8.86323982359930005737e-03, /* 3F8226E3, E96E8493 */ 3.59207910759131235356e-03, /* 3F6D6D22, C9560328 */ 1.45620945432529025516e-03, /* 3F57DBC8, FEE08315 */ 5.88041240820264096874e-04, /* 3F4344D8, F2F26501 */ 2.46463134818469906812e-04, /* 3F3026F7, 1A8D1068 */ 7.81794442939557092300e-05, /* 3F147E88, A03792A6 */ 7.14072491382608190305e-05, /* 3F12B80F, 32F0A7E9 */ -1.85586374855275456654e-05, /* BEF375CB, DB605373 */ 2.59073051863633712884e-05, /* 3EFB2A70, 74BF7AD4 */ ]; const PIO4: f64 = 7.85398163397448278999e-01; /* 3FE921FB, 54442D18 */ const PIO4_LO: f64 = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_tan(mut x: f64, mut y: f64, odd: i32) -> f64 { let hx = (f64::to_bits(x) >> 32) as u32; let big = (hx & 0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */ if big { let sign = hx >> 31; if sign != 0 { x = -x; y = -y; } x = (PIO4 - x) + (PIO4_LO - y); y = 0.0; } let z = x * x; let w = z * z; /* * Break x^5*(T[1]+x^2*T[2]+...) into * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) + * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12])) */ let r = T[1] + w * (T[3] + w * (T[5] + w * (T[7] + w * (T[9] + w * T[11])))); let v = z * (T[2] + w * (T[4] + w * (T[6] + w * (T[8] + w * (T[10] + w * T[12]))))); let s = z * x; let r = y + z * (s * (r + v) + y) + s * T[0]; let w = x + r; if big { let sign = hx >> 31; let s = 1.0 - 2.0 * odd as f64; let v = s - 2.0 * (x + (r - w * w / (w + s))); return if sign != 0 { -v } else { v }; } if odd == 0 { return w; } /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */ let w0 = zero_low_word(w); let v = r - (w0 - x); /* w0+v = r+x */ let a = -1.0 / w; let a0 = zero_low_word(a); a0 + a * (1.0 + a0 * w0 + a0 * v) } fn zero_low_word(x: f64) -> f64 { f64::from_bits(f64::to_bits(x) & 0xFFFF_FFFF_0000_0000) } compiler_builtins-0.1.101/libm/src/math/k_tanf.rs000064400000000000000000000036041046102023000177650ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/k_tan.c */ /* * ==================================================== * Copyright 2004 Sun Microsystems, Inc. All Rights Reserved. * * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* |tan(x)/x - t(x)| < 2**-25.5 (~[-2e-08, 2e-08]). */ const T: [f64; 6] = [ 0.333331395030791399758, /* 0x15554d3418c99f.0p-54 */ 0.133392002712976742718, /* 0x1112fd38999f72.0p-55 */ 0.0533812378445670393523, /* 0x1b54c91d865afe.0p-57 */ 0.0245283181166547278873, /* 0x191df3908c33ce.0p-58 */ 0.00297435743359967304927, /* 0x185dadfcecf44e.0p-61 */ 0.00946564784943673166728, /* 0x1362b9bf971bcd.0p-59 */ ]; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn k_tanf(x: f64, odd: bool) -> f32 { let z = x * x; /* * Split up the polynomial into small independent terms to give * opportunities for parallel evaluation. The chosen splitting is * micro-optimized for Athlons (XP, X64). It costs 2 multiplications * relative to Horner's method on sequential machines. * * We add the small terms from lowest degree up for efficiency on * non-sequential machines (the lowest degree terms tend to be ready * earlier). Apart from this, we don't care about order of * operations, and don't need to to care since we have precision to * spare. However, the chosen splitting is good for accuracy too, * and would give results as accurate as Horner's method if the * small terms were added from highest degree down. */ let mut r = T[4] + z * T[5]; let t = T[2] + z * T[3]; let w = z * z; let s = z * x; let u = T[0] + z * T[1]; r = (x + s * u) + (s * w) * (t + w * r); (if odd { -1. / r } else { r }) as f32 } compiler_builtins-0.1.101/libm/src/math/ldexp.rs000064400000000000000000000001741046102023000176360ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ldexp(x: f64, n: i32) -> f64 { super::scalbn(x, n) } compiler_builtins-0.1.101/libm/src/math/ldexpf.rs000064400000000000000000000001761046102023000200060ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn ldexpf(x: f32, n: i32) -> f32 { super::scalbnf(x, n) } compiler_builtins-0.1.101/libm/src/math/lgamma.rs000064400000000000000000000002051046102023000177530ustar 00000000000000use super::lgamma_r; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgamma(x: f64) -> f64 { lgamma_r(x).0 } compiler_builtins-0.1.101/libm/src/math/lgamma_r.rs000064400000000000000000000306431046102023000203050ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_lgamma_r.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== * */ /* lgamma_r(x, signgamp) * Reentrant version of the logarithm of the Gamma function * with user provide pointer for the sign of Gamma(x). * * Method: * 1. Argument Reduction for 0 < x <= 8 * Since gamma(1+s)=s*gamma(s), for x in [0,8], we may * reduce x to a number in [1.5,2.5] by * lgamma(1+s) = log(s) + lgamma(s) * for example, * lgamma(7.3) = log(6.3) + lgamma(6.3) * = log(6.3*5.3) + lgamma(5.3) * = log(6.3*5.3*4.3*3.3*2.3) + lgamma(2.3) * 2. Polynomial approximation of lgamma around its * minimun ymin=1.461632144968362245 to maintain monotonicity. * On [ymin-0.23, ymin+0.27] (i.e., [1.23164,1.73163]), use * Let z = x-ymin; * lgamma(x) = -1.214862905358496078218 + z^2*poly(z) * where * poly(z) is a 14 degree polynomial. * 2. Rational approximation in the primary interval [2,3] * We use the following approximation: * s = x-2.0; * lgamma(x) = 0.5*s + s*P(s)/Q(s) * with accuracy * |P/Q - (lgamma(x)-0.5s)| < 2**-61.71 * Our algorithms are based on the following observation * * zeta(2)-1 2 zeta(3)-1 3 * lgamma(2+s) = s*(1-Euler) + --------- * s - --------- * s + ... * 2 3 * * where Euler = 0.5771... is the Euler constant, which is very * close to 0.5. * * 3. For x>=8, we have * lgamma(x)~(x-0.5)log(x)-x+0.5*log(2pi)+1/(12x)-1/(360x**3)+.... * (better formula: * lgamma(x)~(x-0.5)*(log(x)-1)-.5*(log(2pi)-1) + ...) * Let z = 1/x, then we approximation * f(z) = lgamma(x) - (x-0.5)(log(x)-1) * by * 3 5 11 * w = w0 + w1*z + w2*z + w3*z + ... + w6*z * where * |w - f(z)| < 2**-58.74 * * 4. For negative x, since (G is gamma function) * -x*G(-x)*G(x) = PI/sin(PI*x), * we have * G(x) = PI/(sin(PI*x)*(-x)*G(-x)) * since G(-x) is positive, sign(G(x)) = sign(sin(PI*x)) for x<0 * Hence, for x<0, signgam = sign(sin(PI*x)) and * lgamma(x) = log(|Gamma(x)|) * = log(PI/(|x*sin(PI*x)|)) - lgamma(-x); * Note: one should avoid compute PI*(-x) directly in the * computation of sin(PI*(-x)). * * 5. Special Cases * lgamma(2+s) ~ s*(1-Euler) for tiny s * lgamma(1) = lgamma(2) = 0 * lgamma(x) ~ -log(|x|) for tiny x * lgamma(0) = lgamma(neg.integer) = inf and raise divide-by-zero * lgamma(inf) = inf * lgamma(-inf) = inf (bug for bug compatible with C99!?) * */ use super::{floor, k_cos, k_sin, log}; const PI: f64 = 3.14159265358979311600e+00; /* 0x400921FB, 0x54442D18 */ const A0: f64 = 7.72156649015328655494e-02; /* 0x3FB3C467, 0xE37DB0C8 */ const A1: f64 = 3.22467033424113591611e-01; /* 0x3FD4A34C, 0xC4A60FAD */ const A2: f64 = 6.73523010531292681824e-02; /* 0x3FB13E00, 0x1A5562A7 */ const A3: f64 = 2.05808084325167332806e-02; /* 0x3F951322, 0xAC92547B */ const A4: f64 = 7.38555086081402883957e-03; /* 0x3F7E404F, 0xB68FEFE8 */ const A5: f64 = 2.89051383673415629091e-03; /* 0x3F67ADD8, 0xCCB7926B */ const A6: f64 = 1.19270763183362067845e-03; /* 0x3F538A94, 0x116F3F5D */ const A7: f64 = 5.10069792153511336608e-04; /* 0x3F40B6C6, 0x89B99C00 */ const A8: f64 = 2.20862790713908385557e-04; /* 0x3F2CF2EC, 0xED10E54D */ const A9: f64 = 1.08011567247583939954e-04; /* 0x3F1C5088, 0x987DFB07 */ const A10: f64 = 2.52144565451257326939e-05; /* 0x3EFA7074, 0x428CFA52 */ const A11: f64 = 4.48640949618915160150e-05; /* 0x3F07858E, 0x90A45837 */ const TC: f64 = 1.46163214496836224576e+00; /* 0x3FF762D8, 0x6356BE3F */ const TF: f64 = -1.21486290535849611461e-01; /* 0xBFBF19B9, 0xBCC38A42 */ /* tt = -(tail of TF) */ const TT: f64 = -3.63867699703950536541e-18; /* 0xBC50C7CA, 0xA48A971F */ const T0: f64 = 4.83836122723810047042e-01; /* 0x3FDEF72B, 0xC8EE38A2 */ const T1: f64 = -1.47587722994593911752e-01; /* 0xBFC2E427, 0x8DC6C509 */ const T2: f64 = 6.46249402391333854778e-02; /* 0x3FB08B42, 0x94D5419B */ const T3: f64 = -3.27885410759859649565e-02; /* 0xBFA0C9A8, 0xDF35B713 */ const T4: f64 = 1.79706750811820387126e-02; /* 0x3F9266E7, 0x970AF9EC */ const T5: f64 = -1.03142241298341437450e-02; /* 0xBF851F9F, 0xBA91EC6A */ const T6: f64 = 6.10053870246291332635e-03; /* 0x3F78FCE0, 0xE370E344 */ const T7: f64 = -3.68452016781138256760e-03; /* 0xBF6E2EFF, 0xB3E914D7 */ const T8: f64 = 2.25964780900612472250e-03; /* 0x3F6282D3, 0x2E15C915 */ const T9: f64 = -1.40346469989232843813e-03; /* 0xBF56FE8E, 0xBF2D1AF1 */ const T10: f64 = 8.81081882437654011382e-04; /* 0x3F4CDF0C, 0xEF61A8E9 */ const T11: f64 = -5.38595305356740546715e-04; /* 0xBF41A610, 0x9C73E0EC */ const T12: f64 = 3.15632070903625950361e-04; /* 0x3F34AF6D, 0x6C0EBBF7 */ const T13: f64 = -3.12754168375120860518e-04; /* 0xBF347F24, 0xECC38C38 */ const T14: f64 = 3.35529192635519073543e-04; /* 0x3F35FD3E, 0xE8C2D3F4 */ const U0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ const U1: f64 = 6.32827064025093366517e-01; /* 0x3FE4401E, 0x8B005DFF */ const U2: f64 = 1.45492250137234768737e+00; /* 0x3FF7475C, 0xD119BD6F */ const U3: f64 = 9.77717527963372745603e-01; /* 0x3FEF4976, 0x44EA8450 */ const U4: f64 = 2.28963728064692451092e-01; /* 0x3FCD4EAE, 0xF6010924 */ const U5: f64 = 1.33810918536787660377e-02; /* 0x3F8B678B, 0xBF2BAB09 */ const V1: f64 = 2.45597793713041134822e+00; /* 0x4003A5D7, 0xC2BD619C */ const V2: f64 = 2.12848976379893395361e+00; /* 0x40010725, 0xA42B18F5 */ const V3: f64 = 7.69285150456672783825e-01; /* 0x3FE89DFB, 0xE45050AF */ const V4: f64 = 1.04222645593369134254e-01; /* 0x3FBAAE55, 0xD6537C88 */ const V5: f64 = 3.21709242282423911810e-03; /* 0x3F6A5ABB, 0x57D0CF61 */ const S0: f64 = -7.72156649015328655494e-02; /* 0xBFB3C467, 0xE37DB0C8 */ const S1: f64 = 2.14982415960608852501e-01; /* 0x3FCB848B, 0x36E20878 */ const S2: f64 = 3.25778796408930981787e-01; /* 0x3FD4D98F, 0x4F139F59 */ const S3: f64 = 1.46350472652464452805e-01; /* 0x3FC2BB9C, 0xBEE5F2F7 */ const S4: f64 = 2.66422703033638609560e-02; /* 0x3F9B481C, 0x7E939961 */ const S5: f64 = 1.84028451407337715652e-03; /* 0x3F5E26B6, 0x7368F239 */ const S6: f64 = 3.19475326584100867617e-05; /* 0x3F00BFEC, 0xDD17E945 */ const R1: f64 = 1.39200533467621045958e+00; /* 0x3FF645A7, 0x62C4AB74 */ const R2: f64 = 7.21935547567138069525e-01; /* 0x3FE71A18, 0x93D3DCDC */ const R3: f64 = 1.71933865632803078993e-01; /* 0x3FC601ED, 0xCCFBDF27 */ const R4: f64 = 1.86459191715652901344e-02; /* 0x3F9317EA, 0x742ED475 */ const R5: f64 = 7.77942496381893596434e-04; /* 0x3F497DDA, 0xCA41A95B */ const R6: f64 = 7.32668430744625636189e-06; /* 0x3EDEBAF7, 0xA5B38140 */ const W0: f64 = 4.18938533204672725052e-01; /* 0x3FDACFE3, 0x90C97D69 */ const W1: f64 = 8.33333333333329678849e-02; /* 0x3FB55555, 0x5555553B */ const W2: f64 = -2.77777777728775536470e-03; /* 0xBF66C16C, 0x16B02E5C */ const W3: f64 = 7.93650558643019558500e-04; /* 0x3F4A019F, 0x98CF38B6 */ const W4: f64 = -5.95187557450339963135e-04; /* 0xBF4380CB, 0x8C0FE741 */ const W5: f64 = 8.36339918996282139126e-04; /* 0x3F4B67BA, 0x4CDAD5D1 */ const W6: f64 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */ /* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ fn sin_pi(mut x: f64) -> f64 { let mut n: i32; /* spurious inexact if odd int */ x = 2.0 * (x * 0.5 - floor(x * 0.5)); /* x mod 2.0 */ n = (x * 4.0) as i32; n = div!(n + 1, 2); x -= (n as f64) * 0.5; x *= PI; match n { 1 => k_cos(x, 0.0), 2 => k_sin(-x, 0.0, 0), 3 => -k_cos(x, 0.0), 0 | _ => k_sin(x, 0.0, 0), } } #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgamma_r(mut x: f64) -> (f64, i32) { let u: u64 = x.to_bits(); let mut t: f64; let y: f64; let mut z: f64; let nadj: f64; let p: f64; let p1: f64; let p2: f64; let p3: f64; let q: f64; let mut r: f64; let w: f64; let ix: u32; let sign: bool; let i: i32; let mut signgam: i32; /* purge off +-inf, NaN, +-0, tiny and negative arguments */ signgam = 1; sign = (u >> 63) != 0; ix = ((u >> 32) as u32) & 0x7fffffff; if ix >= 0x7ff00000 { return (x * x, signgam); } if ix < (0x3ff - 70) << 20 { /* |x|<2**-70, return -log(|x|) */ if sign { x = -x; signgam = -1; } return (-log(x), signgam); } if sign { x = -x; t = sin_pi(x); if t == 0.0 { /* -integer */ return (1.0 / (x - x), signgam); } if t > 0.0 { signgam = -1; } else { t = -t; } nadj = log(PI / (t * x)); } else { nadj = 0.0; } /* purge off 1 and 2 */ if (ix == 0x3ff00000 || ix == 0x40000000) && (u & 0xffffffff) == 0 { r = 0.0; } /* for x < 2.0 */ else if ix < 0x40000000 { if ix <= 0x3feccccc { /* lgamma(x) = lgamma(x+1)-log(x) */ r = -log(x); if ix >= 0x3FE76944 { y = 1.0 - x; i = 0; } else if ix >= 0x3FCDA661 { y = x - (TC - 1.0); i = 1; } else { y = x; i = 2; } } else { r = 0.0; if ix >= 0x3FFBB4C3 { /* [1.7316,2] */ y = 2.0 - x; i = 0; } else if ix >= 0x3FF3B4C4 { /* [1.23,1.73] */ y = x - TC; i = 1; } else { y = x - 1.0; i = 2; } } match i { 0 => { z = y * y; p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); p = y * p1 + p2; r += p - 0.5 * y; } 1 => { z = y * y; w = z * y; p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); p = z * p1 - (TT - w * (p2 + y * p3)); r += TF + p; } 2 => { p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); r += -0.5 * y + p1 / p2; } #[cfg(debug_assertions)] _ => unreachable!(), #[cfg(not(debug_assertions))] _ => {} } } else if ix < 0x40200000 { /* x < 8.0 */ i = x as i32; y = x - (i as f64); p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); r = 0.5 * y + p / q; z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ // TODO: In C, this was implemented using switch jumps with fallthrough. // Does this implementation have performance problems? if i >= 7 { z *= y + 6.0; } if i >= 6 { z *= y + 5.0; } if i >= 5 { z *= y + 4.0; } if i >= 4 { z *= y + 3.0; } if i >= 3 { z *= y + 2.0; r += log(z); } } else if ix < 0x43900000 { /* 8.0 <= x < 2**58 */ t = log(x); z = 1.0 / x; y = z * z; w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); r = (x - 0.5) * (t - 1.0) + w; } else { /* 2**58 <= x <= inf */ r = x * (log(x) - 1.0); } if sign { r = nadj - r; } return (r, signgam); } compiler_builtins-0.1.101/libm/src/math/lgammaf.rs000064400000000000000000000002101046102023000201150ustar 00000000000000use super::lgammaf_r; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgammaf(x: f32) -> f32 { lgammaf_r(x).0 } compiler_builtins-0.1.101/libm/src/math/lgammaf_r.rs000064400000000000000000000205751046102023000204560ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_lgammaf_r.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{floorf, k_cosf, k_sinf, logf}; const PI: f32 = 3.1415927410e+00; /* 0x40490fdb */ const A0: f32 = 7.7215664089e-02; /* 0x3d9e233f */ const A1: f32 = 3.2246702909e-01; /* 0x3ea51a66 */ const A2: f32 = 6.7352302372e-02; /* 0x3d89f001 */ const A3: f32 = 2.0580807701e-02; /* 0x3ca89915 */ const A4: f32 = 7.3855509982e-03; /* 0x3bf2027e */ const A5: f32 = 2.8905137442e-03; /* 0x3b3d6ec6 */ const A6: f32 = 1.1927076848e-03; /* 0x3a9c54a1 */ const A7: f32 = 5.1006977446e-04; /* 0x3a05b634 */ const A8: f32 = 2.2086278477e-04; /* 0x39679767 */ const A9: f32 = 1.0801156895e-04; /* 0x38e28445 */ const A10: f32 = 2.5214456400e-05; /* 0x37d383a2 */ const A11: f32 = 4.4864096708e-05; /* 0x383c2c75 */ const TC: f32 = 1.4616321325e+00; /* 0x3fbb16c3 */ const TF: f32 = -1.2148628384e-01; /* 0xbdf8cdcd */ /* TT = -(tail of TF) */ const TT: f32 = 6.6971006518e-09; /* 0x31e61c52 */ const T0: f32 = 4.8383611441e-01; /* 0x3ef7b95e */ const T1: f32 = -1.4758771658e-01; /* 0xbe17213c */ const T2: f32 = 6.4624942839e-02; /* 0x3d845a15 */ const T3: f32 = -3.2788541168e-02; /* 0xbd064d47 */ const T4: f32 = 1.7970675603e-02; /* 0x3c93373d */ const T5: f32 = -1.0314224288e-02; /* 0xbc28fcfe */ const T6: f32 = 6.1005386524e-03; /* 0x3bc7e707 */ const T7: f32 = -3.6845202558e-03; /* 0xbb7177fe */ const T8: f32 = 2.2596477065e-03; /* 0x3b141699 */ const T9: f32 = -1.4034647029e-03; /* 0xbab7f476 */ const T10: f32 = 8.8108185446e-04; /* 0x3a66f867 */ const T11: f32 = -5.3859531181e-04; /* 0xba0d3085 */ const T12: f32 = 3.1563205994e-04; /* 0x39a57b6b */ const T13: f32 = -3.1275415677e-04; /* 0xb9a3f927 */ const T14: f32 = 3.3552918467e-04; /* 0x39afe9f7 */ const U0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ const U1: f32 = 6.3282704353e-01; /* 0x3f2200f4 */ const U2: f32 = 1.4549225569e+00; /* 0x3fba3ae7 */ const U3: f32 = 9.7771751881e-01; /* 0x3f7a4bb2 */ const U4: f32 = 2.2896373272e-01; /* 0x3e6a7578 */ const U5: f32 = 1.3381091878e-02; /* 0x3c5b3c5e */ const V1: f32 = 2.4559779167e+00; /* 0x401d2ebe */ const V2: f32 = 2.1284897327e+00; /* 0x4008392d */ const V3: f32 = 7.6928514242e-01; /* 0x3f44efdf */ const V4: f32 = 1.0422264785e-01; /* 0x3dd572af */ const V5: f32 = 3.2170924824e-03; /* 0x3b52d5db */ const S0: f32 = -7.7215664089e-02; /* 0xbd9e233f */ const S1: f32 = 2.1498242021e-01; /* 0x3e5c245a */ const S2: f32 = 3.2577878237e-01; /* 0x3ea6cc7a */ const S3: f32 = 1.4635047317e-01; /* 0x3e15dce6 */ const S4: f32 = 2.6642270386e-02; /* 0x3cda40e4 */ const S5: f32 = 1.8402845599e-03; /* 0x3af135b4 */ const S6: f32 = 3.1947532989e-05; /* 0x3805ff67 */ const R1: f32 = 1.3920053244e+00; /* 0x3fb22d3b */ const R2: f32 = 7.2193557024e-01; /* 0x3f38d0c5 */ const R3: f32 = 1.7193385959e-01; /* 0x3e300f6e */ const R4: f32 = 1.8645919859e-02; /* 0x3c98bf54 */ const R5: f32 = 7.7794247773e-04; /* 0x3a4beed6 */ const R6: f32 = 7.3266842264e-06; /* 0x36f5d7bd */ const W0: f32 = 4.1893854737e-01; /* 0x3ed67f1d */ const W1: f32 = 8.3333335817e-02; /* 0x3daaaaab */ const W2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ const W3: f32 = 7.9365057172e-04; /* 0x3a500cfd */ const W4: f32 = -5.9518753551e-04; /* 0xba1c065c */ const W5: f32 = 8.3633989561e-04; /* 0x3a5b3dd2 */ const W6: f32 = -1.6309292987e-03; /* 0xbad5c4e8 */ /* sin(PI*x) assuming x > 2^-100, if sin(PI*x)==0 the sign is arbitrary */ fn sin_pi(mut x: f32) -> f32 { let mut y: f64; let mut n: isize; /* spurious inexact if odd int */ x = 2.0 * (x * 0.5 - floorf(x * 0.5)); /* x mod 2.0 */ n = (x * 4.0) as isize; n = div!(n + 1, 2); y = (x as f64) - (n as f64) * 0.5; y *= 3.14159265358979323846; match n { 1 => k_cosf(y), 2 => k_sinf(-y), 3 => -k_cosf(y), 0 | _ => k_sinf(y), } } #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn lgammaf_r(mut x: f32) -> (f32, i32) { let u = x.to_bits(); let mut t: f32; let y: f32; let mut z: f32; let nadj: f32; let p: f32; let p1: f32; let p2: f32; let p3: f32; let q: f32; let mut r: f32; let w: f32; let ix: u32; let i: i32; let sign: bool; let mut signgam: i32; /* purge off +-inf, NaN, +-0, tiny and negative arguments */ signgam = 1; sign = (u >> 31) != 0; ix = u & 0x7fffffff; if ix >= 0x7f800000 { return (x * x, signgam); } if ix < 0x35000000 { /* |x| < 2**-21, return -log(|x|) */ if sign { signgam = -1; x = -x; } return (-logf(x), signgam); } if sign { x = -x; t = sin_pi(x); if t == 0.0 { /* -integer */ return (1.0 / (x - x), signgam); } if t > 0.0 { signgam = -1; } else { t = -t; } nadj = logf(PI / (t * x)); } else { nadj = 0.0; } /* purge off 1 and 2 */ if ix == 0x3f800000 || ix == 0x40000000 { r = 0.0; } /* for x < 2.0 */ else if ix < 0x40000000 { if ix <= 0x3f666666 { /* lgamma(x) = lgamma(x+1)-log(x) */ r = -logf(x); if ix >= 0x3f3b4a20 { y = 1.0 - x; i = 0; } else if ix >= 0x3e6d3308 { y = x - (TC - 1.0); i = 1; } else { y = x; i = 2; } } else { r = 0.0; if ix >= 0x3fdda618 { /* [1.7316,2] */ y = 2.0 - x; i = 0; } else if ix >= 0x3F9da620 { /* [1.23,1.73] */ y = x - TC; i = 1; } else { y = x - 1.0; i = 2; } } match i { 0 => { z = y * y; p1 = A0 + z * (A2 + z * (A4 + z * (A6 + z * (A8 + z * A10)))); p2 = z * (A1 + z * (A3 + z * (A5 + z * (A7 + z * (A9 + z * A11))))); p = y * p1 + p2; r += p - 0.5 * y; } 1 => { z = y * y; w = z * y; p1 = T0 + w * (T3 + w * (T6 + w * (T9 + w * T12))); /* parallel comp */ p2 = T1 + w * (T4 + w * (T7 + w * (T10 + w * T13))); p3 = T2 + w * (T5 + w * (T8 + w * (T11 + w * T14))); p = z * p1 - (TT - w * (p2 + y * p3)); r += TF + p; } 2 => { p1 = y * (U0 + y * (U1 + y * (U2 + y * (U3 + y * (U4 + y * U5))))); p2 = 1.0 + y * (V1 + y * (V2 + y * (V3 + y * (V4 + y * V5)))); r += -0.5 * y + p1 / p2; } #[cfg(debug_assertions)] _ => unreachable!(), #[cfg(not(debug_assertions))] _ => {} } } else if ix < 0x41000000 { /* x < 8.0 */ i = x as i32; y = x - (i as f32); p = y * (S0 + y * (S1 + y * (S2 + y * (S3 + y * (S4 + y * (S5 + y * S6)))))); q = 1.0 + y * (R1 + y * (R2 + y * (R3 + y * (R4 + y * (R5 + y * R6))))); r = 0.5 * y + p / q; z = 1.0; /* lgamma(1+s) = log(s) + lgamma(s) */ // TODO: In C, this was implemented using switch jumps with fallthrough. // Does this implementation have performance problems? if i >= 7 { z *= y + 6.0; } if i >= 6 { z *= y + 5.0; } if i >= 5 { z *= y + 4.0; } if i >= 4 { z *= y + 3.0; } if i >= 3 { z *= y + 2.0; r += logf(z); } } else if ix < 0x5c800000 { /* 8.0 <= x < 2**58 */ t = logf(x); z = 1.0 / x; y = z * z; w = W0 + z * (W1 + y * (W2 + y * (W3 + y * (W4 + y * (W5 + y * W6))))); r = (x - 0.5) * (t - 1.0) + w; } else { /* 2**58 <= x <= inf */ r = x * (logf(x) - 1.0); } if sign { r = nadj - r; } return (r, signgam); } compiler_builtins-0.1.101/libm/src/math/log.rs000064400000000000000000000107101046102023000173000ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_log.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* log(x) * Return the logarithm of x * * Method : * 1. Argument Reduction: find k and f such that * x = 2^k * (1+f), * where sqrt(2)/2 < 1+f < sqrt(2) . * * 2. Approximation of log(1+f). * Let s = f/(2+f) ; based on log(1+f) = log(1+s) - log(1-s) * = 2s + 2/3 s**3 + 2/5 s**5 + ....., * = 2s + s*R * We use a special Remez algorithm on [0,0.1716] to generate * a polynomial of degree 14 to approximate R The maximum error * of this polynomial approximation is bounded by 2**-58.45. In * other words, * 2 4 6 8 10 12 14 * R(z) ~ Lg1*s +Lg2*s +Lg3*s +Lg4*s +Lg5*s +Lg6*s +Lg7*s * (the values of Lg1 to Lg7 are listed in the program) * and * | 2 14 | -58.45 * | Lg1*s +...+Lg7*s - R(z) | <= 2 * | | * Note that 2s = f - s*f = f - hfsq + s*hfsq, where hfsq = f*f/2. * In order to guarantee error in log below 1ulp, we compute log * by * log(1+f) = f - s*(f - R) (if f is not too large) * log(1+f) = f - (hfsq - s*(hfsq+R)). (better accuracy) * * 3. Finally, log(x) = k*ln2 + log(1+f). * = k*ln2_hi+(f-(hfsq-(s*(hfsq+R)+k*ln2_lo))) * Here ln2 is split into two floating point number: * ln2_hi + ln2_lo, * where n*ln2_hi is always exact for |n| < 2000. * * Special cases: * log(x) is NaN with signal if x < 0 (including -INF) ; * log(+INF) is +INF; log(0) is -INF with signal; * log(NaN) is that NaN with no signal. * * Accuracy: * according to an error analysis, the error is always less than * 1 ulp (unit in the last place). * * Constants: * The hexadecimal values are the intended ones for the following * constants. The decimal values may be used, provided that the * compiler will convert from decimal to binary accurately enough * to produce the hexadecimal values shown. */ const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */ const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */ const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 let mut ui = x.to_bits(); let mut hx: u32 = (ui >> 32) as u32; let mut k: i32 = 0; if (hx < 0x00100000) || ((hx >> 31) != 0) { /* x < 2**-126 */ if ui << 1 == 0 { return -1. / (x * x); /* log(+-0)=-inf */ } if hx >> 31 != 0 { return (x - x) / 0.0; /* log(-#) = NaN */ } /* subnormal number, scale x up */ k -= 54; x *= x1p54; ui = x.to_bits(); hx = (ui >> 32) as u32; } else if hx >= 0x7ff00000 { return x; } else if hx == 0x3ff00000 && ui << 32 == 0 { return 0.; } /* reduce x into [sqrt(2)/2, sqrt(2)] */ hx += 0x3ff00000 - 0x3fe6a09e; k += ((hx >> 20) as i32) - 0x3ff; hx = (hx & 0x000fffff) + 0x3fe6a09e; ui = ((hx as u64) << 32) | (ui & 0xffffffff); x = f64::from_bits(ui); let f: f64 = x - 1.0; let hfsq: f64 = 0.5 * f * f; let s: f64 = f / (2.0 + f); let z: f64 = s * s; let w: f64 = z * z; let t1: f64 = w * (LG2 + w * (LG4 + w * LG6)); let t2: f64 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); let r: f64 = t2 + t1; let dk: f64 = k as f64; s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI } compiler_builtins-0.1.101/libm/src/math/log10.rs000064400000000000000000000072671046102023000174560ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_log10.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* * Return the base 10 logarithm of x. See log.c for most comments. * * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 * as in log.c, then combine and scale in extra precision: * log10(x) = (f - f*f/2 + r)/log(10) + k*log10(2) */ use core::f64; const IVLN10HI: f64 = 4.34294481878168880939e-01; /* 0x3fdbcb7b, 0x15200000 */ const IVLN10LO: f64 = 2.50829467116452752298e-11; /* 0x3dbb9438, 0xca9aadd5 */ const LOG10_2HI: f64 = 3.01029995663611771306e-01; /* 0x3FD34413, 0x509F6000 */ const LOG10_2LO: f64 = 3.69423907715893078616e-13; /* 0x3D59FEF3, 0x11F12B36 */ const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log10(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 let mut ui: u64 = x.to_bits(); let hfsq: f64; let f: f64; let s: f64; let z: f64; let r: f64; let mut w: f64; let t1: f64; let t2: f64; let dk: f64; let y: f64; let mut hi: f64; let lo: f64; let mut val_hi: f64; let mut val_lo: f64; let mut hx: u32; let mut k: i32; hx = (ui >> 32) as u32; k = 0; if hx < 0x00100000 || (hx >> 31) > 0 { if ui << 1 == 0 { return -1. / (x * x); /* log(+-0)=-inf */ } if (hx >> 31) > 0 { return (x - x) / 0.0; /* log(-#) = NaN */ } /* subnormal number, scale x up */ k -= 54; x *= x1p54; ui = x.to_bits(); hx = (ui >> 32) as u32; } else if hx >= 0x7ff00000 { return x; } else if hx == 0x3ff00000 && ui << 32 == 0 { return 0.; } /* reduce x into [sqrt(2)/2, sqrt(2)] */ hx += 0x3ff00000 - 0x3fe6a09e; k += (hx >> 20) as i32 - 0x3ff; hx = (hx & 0x000fffff) + 0x3fe6a09e; ui = (hx as u64) << 32 | (ui & 0xffffffff); x = f64::from_bits(ui); f = x - 1.0; hfsq = 0.5 * f * f; s = f / (2.0 + f); z = s * s; w = z * z; t1 = w * (LG2 + w * (LG4 + w * LG6)); t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); r = t2 + t1; /* See log2.c for details. */ /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ hi = f - hfsq; ui = hi.to_bits(); ui &= (-1i64 as u64) << 32; hi = f64::from_bits(ui); lo = f - hi - hfsq + s * (hfsq + r); /* val_hi+val_lo ~ log10(1+f) + k*log10(2) */ val_hi = hi * IVLN10HI; dk = k as f64; y = dk * LOG10_2HI; val_lo = dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI; /* * Extra precision in for adding y is not strictly needed * since there is no very large cancellation near x = sqrt(2) or * x = 1/sqrt(2), but we do it anyway since it costs little on CPUs * with some parallelism and it reduces the error for many args. */ w = y + val_hi; val_lo += (y - w) + val_hi; val_hi = w; val_lo + val_hi } compiler_builtins-0.1.101/libm/src/math/log10f.rs000064400000000000000000000050011046102023000176040ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_log10f.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* * See comments in log10.c. */ use core::f32; const IVLN10HI: f32 = 4.3432617188e-01; /* 0x3ede6000 */ const IVLN10LO: f32 = -3.1689971365e-05; /* 0xb804ead9 */ const LOG10_2HI: f32 = 3.0102920532e-01; /* 0x3e9a2080 */ const LOG10_2LO: f32 = 7.9034151668e-07; /* 0x355427db */ /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log10f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 let mut ui: u32 = x.to_bits(); let hfsq: f32; let f: f32; let s: f32; let z: f32; let r: f32; let w: f32; let t1: f32; let t2: f32; let dk: f32; let mut hi: f32; let lo: f32; let mut ix: u32; let mut k: i32; ix = ui; k = 0; if ix < 0x00800000 || (ix >> 31) > 0 { /* x < 2**-126 */ if ix << 1 == 0 { return -1. / (x * x); /* log(+-0)=-inf */ } if (ix >> 31) > 0 { return (x - x) / 0.0; /* log(-#) = NaN */ } /* subnormal number, scale up x */ k -= 25; x *= x1p25f; ui = x.to_bits(); ix = ui; } else if ix >= 0x7f800000 { return x; } else if ix == 0x3f800000 { return 0.; } /* reduce x into [sqrt(2)/2, sqrt(2)] */ ix += 0x3f800000 - 0x3f3504f3; k += (ix >> 23) as i32 - 0x7f; ix = (ix & 0x007fffff) + 0x3f3504f3; ui = ix; x = f32::from_bits(ui); f = x - 1.0; s = f / (2.0 + f); z = s * s; w = z * z; t1 = w * (LG2 + w * LG4); t2 = z * (LG1 + w * LG3); r = t2 + t1; hfsq = 0.5 * f * f; hi = f - hfsq; ui = hi.to_bits(); ui &= 0xfffff000; hi = f32::from_bits(ui); lo = f - hi - hfsq + s * (hfsq + r); dk = k as f32; dk * LOG10_2LO + (lo + hi) * IVLN10LO + lo * IVLN10HI + hi * IVLN10HI + dk * LOG10_2HI } compiler_builtins-0.1.101/libm/src/math/log1p.rs000064400000000000000000000112411046102023000175410ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_log1p.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* double log1p(double x) * Return the natural logarithm of 1+x. * * Method : * 1. Argument Reduction: find k and f such that * 1+x = 2^k * (1+f), * where sqrt(2)/2 < 1+f < sqrt(2) . * * Note. If k=0, then f=x is exact. However, if k!=0, then f * may not be representable exactly. In that case, a correction * term is need. Let u=1+x rounded. Let c = (1+x)-u, then * log(1+x) - log(u) ~ c/u. Thus, we proceed to compute log(u), * and add back the correction term c/u. * (Note: when x > 2**53, one can simply return log(x)) * * 2. Approximation of log(1+f): See log.c * * 3. Finally, log1p(x) = k*ln2 + log(1+f) + c/u. See log.c * * Special cases: * log1p(x) is NaN with signal if x < -1 (including -INF) ; * log1p(+INF) is +INF; log1p(-1) is -INF with signal; * log1p(NaN) is that NaN with no signal. * * Accuracy: * according to an error analysis, the error is always less than * 1 ulp (unit in the last place). * * Constants: * The hexadecimal values are the intended ones for the following * constants. The decimal values may be used, provided that the * compiler will convert from decimal to binary accurately enough * to produce the hexadecimal values shown. * * Note: Assuming log() return accurate answer, the following * algorithm can be used to compute log1p(x) to within a few ULP: * * u = 1+x; * if(u==1.0) return x ; else * return log(u)*(x/(u-1.0)); * * See HP-15C Advanced Functions Handbook, p.193. */ use core::f64; const LN2_HI: f64 = 6.93147180369123816490e-01; /* 3fe62e42 fee00000 */ const LN2_LO: f64 = 1.90821492927058770002e-10; /* 3dea39ef 35793c76 */ const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log1p(x: f64) -> f64 { let mut ui: u64 = x.to_bits(); let hfsq: f64; let mut f: f64 = 0.; let mut c: f64 = 0.; let s: f64; let z: f64; let r: f64; let w: f64; let t1: f64; let t2: f64; let dk: f64; let hx: u32; let mut hu: u32; let mut k: i32; hx = (ui >> 32) as u32; k = 1; if hx < 0x3fda827a || (hx >> 31) > 0 { /* 1+x < sqrt(2)+ */ if hx >= 0xbff00000 { /* x <= -1.0 */ if x == -1. { return x / 0.0; /* log1p(-1) = -inf */ } return (x - x) / 0.0; /* log1p(x<-1) = NaN */ } if hx << 1 < 0x3ca00000 << 1 { /* |x| < 2**-53 */ /* underflow if subnormal */ if (hx & 0x7ff00000) == 0 { force_eval!(x as f32); } return x; } if hx <= 0xbfd2bec4 { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ k = 0; c = 0.; f = x; } } else if hx >= 0x7ff00000 { return x; } if k > 0 { ui = (1. + x).to_bits(); hu = (ui >> 32) as u32; hu += 0x3ff00000 - 0x3fe6a09e; k = (hu >> 20) as i32 - 0x3ff; /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ if k < 54 { c = if k >= 2 { 1. - (f64::from_bits(ui) - x) } else { x - (f64::from_bits(ui) - 1.) }; c /= f64::from_bits(ui); } else { c = 0.; } /* reduce u into [sqrt(2)/2, sqrt(2)] */ hu = (hu & 0x000fffff) + 0x3fe6a09e; ui = (hu as u64) << 32 | (ui & 0xffffffff); f = f64::from_bits(ui) - 1.; } hfsq = 0.5 * f * f; s = f / (2.0 + f); z = s * s; w = z * z; t1 = w * (LG2 + w * (LG4 + w * LG6)); t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); r = t2 + t1; dk = k as f64; s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI } compiler_builtins-0.1.101/libm/src/math/log1pf.rs000064400000000000000000000054241046102023000177150ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_log1pf.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use core::f32; const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log1pf(x: f32) -> f32 { let mut ui: u32 = x.to_bits(); let hfsq: f32; let mut f: f32 = 0.; let mut c: f32 = 0.; let s: f32; let z: f32; let r: f32; let w: f32; let t1: f32; let t2: f32; let dk: f32; let ix: u32; let mut iu: u32; let mut k: i32; ix = ui; k = 1; if ix < 0x3ed413d0 || (ix >> 31) > 0 { /* 1+x < sqrt(2)+ */ if ix >= 0xbf800000 { /* x <= -1.0 */ if x == -1. { return x / 0.0; /* log1p(-1)=+inf */ } return (x - x) / 0.0; /* log1p(x<-1)=NaN */ } if ix << 1 < 0x33800000 << 1 { /* |x| < 2**-24 */ /* underflow if subnormal */ if (ix & 0x7f800000) == 0 { force_eval!(x * x); } return x; } if ix <= 0xbe95f619 { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */ k = 0; c = 0.; f = x; } } else if ix >= 0x7f800000 { return x; } if k > 0 { ui = (1. + x).to_bits(); iu = ui; iu += 0x3f800000 - 0x3f3504f3; k = (iu >> 23) as i32 - 0x7f; /* correction term ~ log(1+x)-log(u), avoid underflow in c/u */ if k < 25 { c = if k >= 2 { 1. - (f32::from_bits(ui) - x) } else { x - (f32::from_bits(ui) - 1.) }; c /= f32::from_bits(ui); } else { c = 0.; } /* reduce u into [sqrt(2)/2, sqrt(2)] */ iu = (iu & 0x007fffff) + 0x3f3504f3; ui = iu; f = f32::from_bits(ui) - 1.; } s = f / (2.0 + f); z = s * s; w = z * z; t1 = w * (LG2 + w * LG4); t2 = z * (LG1 + w * LG3); r = t2 + t1; hfsq = 0.5 * f * f; dk = k as f32; s * (hfsq + r) + (dk * LN2_LO + c) - hfsq + f + dk * LN2_HI } compiler_builtins-0.1.101/libm/src/math/log2.rs000064400000000000000000000062311046102023000173650ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_log2.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* * Return the base 2 logarithm of x. See log.c for most comments. * * Reduce x to 2^k (1+f) and calculate r = log(1+f) - f + f*f/2 * as in log.c, then combine and scale in extra precision: * log2(x) = (f - f*f/2 + r)/log(2) + k */ use core::f64; const IVLN2HI: f64 = 1.44269504072144627571e+00; /* 0x3ff71547, 0x65200000 */ const IVLN2LO: f64 = 1.67517131648865118353e-10; /* 0x3de705fc, 0x2eefa200 */ const LG1: f64 = 6.666666666666735130e-01; /* 3FE55555 55555593 */ const LG2: f64 = 3.999999999940941908e-01; /* 3FD99999 9997FA04 */ const LG3: f64 = 2.857142874366239149e-01; /* 3FD24924 94229359 */ const LG4: f64 = 2.222219843214978396e-01; /* 3FCC71C5 1D8E78AF */ const LG5: f64 = 1.818357216161805012e-01; /* 3FC74664 96CB03DE */ const LG6: f64 = 1.531383769920937332e-01; /* 3FC39A09 D078C69F */ const LG7: f64 = 1.479819860511658591e-01; /* 3FC2F112 DF3E5244 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log2(mut x: f64) -> f64 { let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54 let mut ui: u64 = x.to_bits(); let hfsq: f64; let f: f64; let s: f64; let z: f64; let r: f64; let mut w: f64; let t1: f64; let t2: f64; let y: f64; let mut hi: f64; let lo: f64; let mut val_hi: f64; let mut val_lo: f64; let mut hx: u32; let mut k: i32; hx = (ui >> 32) as u32; k = 0; if hx < 0x00100000 || (hx >> 31) > 0 { if ui << 1 == 0 { return -1. / (x * x); /* log(+-0)=-inf */ } if (hx >> 31) > 0 { return (x - x) / 0.0; /* log(-#) = NaN */ } /* subnormal number, scale x up */ k -= 54; x *= x1p54; ui = x.to_bits(); hx = (ui >> 32) as u32; } else if hx >= 0x7ff00000 { return x; } else if hx == 0x3ff00000 && ui << 32 == 0 { return 0.; } /* reduce x into [sqrt(2)/2, sqrt(2)] */ hx += 0x3ff00000 - 0x3fe6a09e; k += (hx >> 20) as i32 - 0x3ff; hx = (hx & 0x000fffff) + 0x3fe6a09e; ui = (hx as u64) << 32 | (ui & 0xffffffff); x = f64::from_bits(ui); f = x - 1.0; hfsq = 0.5 * f * f; s = f / (2.0 + f); z = s * s; w = z * z; t1 = w * (LG2 + w * (LG4 + w * LG6)); t2 = z * (LG1 + w * (LG3 + w * (LG5 + w * LG7))); r = t2 + t1; /* hi+lo = f - hfsq + s*(hfsq+R) ~ log(1+f) */ hi = f - hfsq; ui = hi.to_bits(); ui &= (-1i64 as u64) << 32; hi = f64::from_bits(ui); lo = f - hi - hfsq + s * (hfsq + r); val_hi = hi * IVLN2HI; val_lo = (lo + hi) * IVLN2LO + lo * IVLN2HI; /* spadd(val_hi, val_lo, y), except for not using double_t: */ y = k.into(); w = y + val_hi; val_lo += (y - w) + val_hi; val_hi = w; val_lo + val_hi } compiler_builtins-0.1.101/libm/src/math/log2f.rs000064400000000000000000000045121046102023000175330ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_log2f.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* * See comments in log2.c. */ use core::f32; const IVLN2HI: f32 = 1.4428710938e+00; /* 0x3fb8b000 */ const IVLN2LO: f32 = -1.7605285393e-04; /* 0xb9389ad4 */ /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24 */ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn log2f(mut x: f32) -> f32 { let x1p25f = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 let mut ui: u32 = x.to_bits(); let hfsq: f32; let f: f32; let s: f32; let z: f32; let r: f32; let w: f32; let t1: f32; let t2: f32; let mut hi: f32; let lo: f32; let mut ix: u32; let mut k: i32; ix = ui; k = 0; if ix < 0x00800000 || (ix >> 31) > 0 { /* x < 2**-126 */ if ix << 1 == 0 { return -1. / (x * x); /* log(+-0)=-inf */ } if (ix >> 31) > 0 { return (x - x) / 0.0; /* log(-#) = NaN */ } /* subnormal number, scale up x */ k -= 25; x *= x1p25f; ui = x.to_bits(); ix = ui; } else if ix >= 0x7f800000 { return x; } else if ix == 0x3f800000 { return 0.; } /* reduce x into [sqrt(2)/2, sqrt(2)] */ ix += 0x3f800000 - 0x3f3504f3; k += (ix >> 23) as i32 - 0x7f; ix = (ix & 0x007fffff) + 0x3f3504f3; ui = ix; x = f32::from_bits(ui); f = x - 1.0; s = f / (2.0 + f); z = s * s; w = z * z; t1 = w * (LG2 + w * LG4); t2 = z * (LG1 + w * LG3); r = t2 + t1; hfsq = 0.5 * f * f; hi = f - hfsq; ui = hi.to_bits(); ui &= 0xfffff000; hi = f32::from_bits(ui); lo = f - hi - hfsq + s * (hfsq + r); (lo + hi) * IVLN2LO + lo * IVLN2HI + hi * IVLN2HI + k as f32 } compiler_builtins-0.1.101/libm/src/math/logf.rs000064400000000000000000000040341046102023000174500ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_logf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ const LN2_HI: f32 = 6.9313812256e-01; /* 0x3f317180 */ const LN2_LO: f32 = 9.0580006145e-06; /* 0x3717f7d1 */ /* |(log(1+s)-log(1-s))/s - Lg(s)| < 2**-34.24 (~[-4.95e-11, 4.97e-11]). */ const LG1: f32 = 0.66666662693; /* 0xaaaaaa.0p-24*/ const LG2: f32 = 0.40000972152; /* 0xccce13.0p-25 */ const LG3: f32 = 0.28498786688; /* 0x91e9ee.0p-25 */ const LG4: f32 = 0.24279078841; /* 0xf89e26.0p-26 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn logf(mut x: f32) -> f32 { let x1p25 = f32::from_bits(0x4c000000); // 0x1p25f === 2 ^ 25 let mut ix = x.to_bits(); let mut k = 0i32; if (ix < 0x00800000) || ((ix >> 31) != 0) { /* x < 2**-126 */ if ix << 1 == 0 { return -1. / (x * x); /* log(+-0)=-inf */ } if (ix >> 31) != 0 { return (x - x) / 0.; /* log(-#) = NaN */ } /* subnormal number, scale up x */ k -= 25; x *= x1p25; ix = x.to_bits(); } else if ix >= 0x7f800000 { return x; } else if ix == 0x3f800000 { return 0.; } /* reduce x into [sqrt(2)/2, sqrt(2)] */ ix += 0x3f800000 - 0x3f3504f3; k += ((ix >> 23) as i32) - 0x7f; ix = (ix & 0x007fffff) + 0x3f3504f3; x = f32::from_bits(ix); let f = x - 1.; let s = f / (2. + f); let z = s * s; let w = z * z; let t1 = w * (LG2 + w * LG4); let t2 = z * (LG1 + w * LG3); let r = t2 + t1; let hfsq = 0.5 * f * f; let dk = k as f32; s * (hfsq + r) + dk * LN2_LO - hfsq + f + dk * LN2_HI } compiler_builtins-0.1.101/libm/src/math/mod.rs000064400000000000000000000175441046102023000173120ustar 00000000000000macro_rules! force_eval { ($e:expr) => { unsafe { ::core::ptr::read_volatile(&$e) } }; } #[cfg(not(debug_assertions))] macro_rules! i { ($array:expr, $index:expr) => { unsafe { *$array.get_unchecked($index) } }; ($array:expr, $index:expr, = , $rhs:expr) => { unsafe { *$array.get_unchecked_mut($index) = $rhs; } }; ($array:expr, $index:expr, += , $rhs:expr) => { unsafe { *$array.get_unchecked_mut($index) += $rhs; } }; ($array:expr, $index:expr, -= , $rhs:expr) => { unsafe { *$array.get_unchecked_mut($index) -= $rhs; } }; ($array:expr, $index:expr, &= , $rhs:expr) => { unsafe { *$array.get_unchecked_mut($index) &= $rhs; } }; ($array:expr, $index:expr, == , $rhs:expr) => { unsafe { *$array.get_unchecked_mut($index) == $rhs } }; } #[cfg(debug_assertions)] macro_rules! i { ($array:expr, $index:expr) => { *$array.get($index).unwrap() }; ($array:expr, $index:expr, = , $rhs:expr) => { *$array.get_mut($index).unwrap() = $rhs; }; ($array:expr, $index:expr, -= , $rhs:expr) => { *$array.get_mut($index).unwrap() -= $rhs; }; ($array:expr, $index:expr, += , $rhs:expr) => { *$array.get_mut($index).unwrap() += $rhs; }; ($array:expr, $index:expr, &= , $rhs:expr) => { *$array.get_mut($index).unwrap() &= $rhs; }; ($array:expr, $index:expr, == , $rhs:expr) => { *$array.get_mut($index).unwrap() == $rhs }; } // Temporary macro to avoid panic codegen for division (in debug mode too). At // the time of this writing this is only used in a few places, and once // rust-lang/rust#72751 is fixed then this macro will no longer be necessary and // the native `/` operator can be used and panics won't be codegen'd. #[cfg(any(debug_assertions, not(feature = "unstable")))] macro_rules! div { ($a:expr, $b:expr) => { $a / $b }; } #[cfg(all(not(debug_assertions), feature = "unstable"))] macro_rules! div { ($a:expr, $b:expr) => { unsafe { core::intrinsics::unchecked_div($a, $b) } }; } macro_rules! llvm_intrinsically_optimized { (#[cfg($($clause:tt)*)] $e:expr) => { #[cfg(all(feature = "unstable", $($clause)*))] { if true { // thwart the dead code lint $e } } }; } // Public modules mod acos; mod acosf; mod acosh; mod acoshf; mod asin; mod asinf; mod asinh; mod asinhf; mod atan; mod atan2; mod atan2f; mod atanf; mod atanh; mod atanhf; mod cbrt; mod cbrtf; mod ceil; mod ceilf; mod copysign; mod copysignf; mod cos; mod cosf; mod cosh; mod coshf; mod erf; mod erff; mod exp; mod exp10; mod exp10f; mod exp2; mod exp2f; mod expf; mod expm1; mod expm1f; mod fabs; mod fabsf; mod fdim; mod fdimf; mod floor; mod floorf; mod fma; mod fmaf; mod fmax; mod fmaxf; mod fmin; mod fminf; mod fmod; mod fmodf; mod frexp; mod frexpf; mod hypot; mod hypotf; mod ilogb; mod ilogbf; mod j0; mod j0f; mod j1; mod j1f; mod jn; mod jnf; mod ldexp; mod ldexpf; mod lgamma; mod lgamma_r; mod lgammaf; mod lgammaf_r; mod log; mod log10; mod log10f; mod log1p; mod log1pf; mod log2; mod log2f; mod logf; mod modf; mod modff; mod nextafter; mod nextafterf; mod pow; mod powf; mod remainder; mod remainderf; mod remquo; mod remquof; mod rint; mod rintf; mod round; mod roundf; mod scalbn; mod scalbnf; mod sin; mod sincos; mod sincosf; mod sinf; mod sinh; mod sinhf; mod sqrt; mod sqrtf; mod tan; mod tanf; mod tanh; mod tanhf; mod tgamma; mod tgammaf; mod trunc; mod truncf; // Use separated imports instead of {}-grouped imports for easier merging. pub use self::acos::acos; pub use self::acosf::acosf; pub use self::acosh::acosh; pub use self::acoshf::acoshf; pub use self::asin::asin; pub use self::asinf::asinf; pub use self::asinh::asinh; pub use self::asinhf::asinhf; pub use self::atan::atan; pub use self::atan2::atan2; pub use self::atan2f::atan2f; pub use self::atanf::atanf; pub use self::atanh::atanh; pub use self::atanhf::atanhf; pub use self::cbrt::cbrt; pub use self::cbrtf::cbrtf; pub use self::ceil::ceil; pub use self::ceilf::ceilf; pub use self::copysign::copysign; pub use self::copysignf::copysignf; pub use self::cos::cos; pub use self::cosf::cosf; pub use self::cosh::cosh; pub use self::coshf::coshf; pub use self::erf::erf; pub use self::erf::erfc; pub use self::erff::erfcf; pub use self::erff::erff; pub use self::exp::exp; pub use self::exp10::exp10; pub use self::exp10f::exp10f; pub use self::exp2::exp2; pub use self::exp2f::exp2f; pub use self::expf::expf; pub use self::expm1::expm1; pub use self::expm1f::expm1f; pub use self::fabs::fabs; pub use self::fabsf::fabsf; pub use self::fdim::fdim; pub use self::fdimf::fdimf; pub use self::floor::floor; pub use self::floorf::floorf; pub use self::fma::fma; pub use self::fmaf::fmaf; pub use self::fmax::fmax; pub use self::fmaxf::fmaxf; pub use self::fmin::fmin; pub use self::fminf::fminf; pub use self::fmod::fmod; pub use self::fmodf::fmodf; pub use self::frexp::frexp; pub use self::frexpf::frexpf; pub use self::hypot::hypot; pub use self::hypotf::hypotf; pub use self::ilogb::ilogb; pub use self::ilogbf::ilogbf; pub use self::j0::j0; pub use self::j0::y0; pub use self::j0f::j0f; pub use self::j0f::y0f; pub use self::j1::j1; pub use self::j1::y1; pub use self::j1f::j1f; pub use self::j1f::y1f; pub use self::jn::jn; pub use self::jn::yn; pub use self::jnf::jnf; pub use self::jnf::ynf; pub use self::ldexp::ldexp; pub use self::ldexpf::ldexpf; pub use self::lgamma::lgamma; pub use self::lgamma_r::lgamma_r; pub use self::lgammaf::lgammaf; pub use self::lgammaf_r::lgammaf_r; pub use self::log::log; pub use self::log10::log10; pub use self::log10f::log10f; pub use self::log1p::log1p; pub use self::log1pf::log1pf; pub use self::log2::log2; pub use self::log2f::log2f; pub use self::logf::logf; pub use self::modf::modf; pub use self::modff::modff; pub use self::nextafter::nextafter; pub use self::nextafterf::nextafterf; pub use self::pow::pow; pub use self::powf::powf; pub use self::remainder::remainder; pub use self::remainderf::remainderf; pub use self::remquo::remquo; pub use self::remquof::remquof; pub use self::rint::rint; pub use self::rintf::rintf; pub use self::round::round; pub use self::roundf::roundf; pub use self::scalbn::scalbn; pub use self::scalbnf::scalbnf; pub use self::sin::sin; pub use self::sincos::sincos; pub use self::sincosf::sincosf; pub use self::sinf::sinf; pub use self::sinh::sinh; pub use self::sinhf::sinhf; pub use self::sqrt::sqrt; pub use self::sqrtf::sqrtf; pub use self::tan::tan; pub use self::tanf::tanf; pub use self::tanh::tanh; pub use self::tanhf::tanhf; pub use self::tgamma::tgamma; pub use self::tgammaf::tgammaf; pub use self::trunc::trunc; pub use self::truncf::truncf; // Private modules mod expo2; mod fenv; mod k_cos; mod k_cosf; mod k_expo2; mod k_expo2f; mod k_sin; mod k_sinf; mod k_tan; mod k_tanf; mod rem_pio2; mod rem_pio2_large; mod rem_pio2f; // Private re-imports use self::expo2::expo2; use self::k_cos::k_cos; use self::k_cosf::k_cosf; use self::k_expo2::k_expo2; use self::k_expo2f::k_expo2f; use self::k_sin::k_sin; use self::k_sinf::k_sinf; use self::k_tan::k_tan; use self::k_tanf::k_tanf; use self::rem_pio2::rem_pio2; use self::rem_pio2_large::rem_pio2_large; use self::rem_pio2f::rem_pio2f; #[inline] fn get_high_word(x: f64) -> u32 { (x.to_bits() >> 32) as u32 } #[inline] fn get_low_word(x: f64) -> u32 { x.to_bits() as u32 } #[inline] fn with_set_high_word(f: f64, hi: u32) -> f64 { let mut tmp = f.to_bits(); tmp &= 0x00000000_ffffffff; tmp |= (hi as u64) << 32; f64::from_bits(tmp) } #[inline] fn with_set_low_word(f: f64, lo: u32) -> f64 { let mut tmp = f.to_bits(); tmp &= 0xffffffff_00000000; tmp |= lo as u64; f64::from_bits(tmp) } #[inline] fn combine_words(hi: u32, lo: u32) -> f64 { f64::from_bits((hi as u64) << 32 | lo as u64) } compiler_builtins-0.1.101/libm/src/math/modf.rs000064400000000000000000000013321046102023000174440ustar 00000000000000pub fn modf(x: f64) -> (f64, f64) { let rv2: f64; let mut u = x.to_bits(); let mask: u64; let e = ((u >> 52 & 0x7ff) as i32) - 0x3ff; /* no fractional part */ if e >= 52 { rv2 = x; if e == 0x400 && (u << 12) != 0 { /* nan */ return (x, rv2); } u &= 1 << 63; return (f64::from_bits(u), rv2); } /* no integral part*/ if e < 0 { u &= 1 << 63; rv2 = f64::from_bits(u); return (x, rv2); } mask = ((!0) >> 12) >> e; if (u & mask) == 0 { rv2 = x; u &= 1 << 63; return (f64::from_bits(u), rv2); } u &= !mask; rv2 = f64::from_bits(u); return (x - rv2, rv2); } compiler_builtins-0.1.101/libm/src/math/modff.rs000064400000000000000000000013431046102023000176140ustar 00000000000000pub fn modff(x: f32) -> (f32, f32) { let rv2: f32; let mut u: u32 = x.to_bits(); let mask: u32; let e = ((u >> 23 & 0xff) as i32) - 0x7f; /* no fractional part */ if e >= 23 { rv2 = x; if e == 0x80 && (u << 9) != 0 { /* nan */ return (x, rv2); } u &= 0x80000000; return (f32::from_bits(u), rv2); } /* no integral part */ if e < 0 { u &= 0x80000000; rv2 = f32::from_bits(u); return (x, rv2); } mask = 0x007fffff >> e; if (u & mask) == 0 { rv2 = x; u &= 0x80000000; return (f32::from_bits(u), rv2); } u &= !mask; rv2 = f32::from_bits(u); return (x - rv2, rv2); } compiler_builtins-0.1.101/libm/src/math/nextafter.rs000064400000000000000000000015661046102023000205300ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn nextafter(x: f64, y: f64) -> f64 { if x.is_nan() || y.is_nan() { return x + y; } let mut ux_i = x.to_bits(); let uy_i = y.to_bits(); if ux_i == uy_i { return y; } let ax = ux_i & !1_u64 / 2; let ay = uy_i & !1_u64 / 2; if ax == 0 { if ay == 0 { return y; } ux_i = (uy_i & 1_u64 << 63) | 1; } else if ax > ay || ((ux_i ^ uy_i) & 1_u64 << 63) != 0 { ux_i -= 1; } else { ux_i += 1; } let e = ux_i.wrapping_shr(52 & 0x7ff); // raise overflow if ux.f is infinite and x is finite if e == 0x7ff { force_eval!(x + x); } let ux_f = f64::from_bits(ux_i); // raise underflow if ux.f is subnormal or zero if e == 0 { force_eval!(x * x + ux_f * ux_f); } ux_f } compiler_builtins-0.1.101/libm/src/math/nextafterf.rs000064400000000000000000000016301046102023000206660ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn nextafterf(x: f32, y: f32) -> f32 { if x.is_nan() || y.is_nan() { return x + y; } let mut ux_i = x.to_bits(); let uy_i = y.to_bits(); if ux_i == uy_i { return y; } let ax = ux_i & 0x7fff_ffff_u32; let ay = uy_i & 0x7fff_ffff_u32; if ax == 0 { if ay == 0 { return y; } ux_i = (uy_i & 0x8000_0000_u32) | 1; } else if ax > ay || ((ux_i ^ uy_i) & 0x8000_0000_u32) != 0 { ux_i -= 1; } else { ux_i += 1; } let e = ux_i.wrapping_shr(0x7f80_0000_u32); // raise overflow if ux_f is infinite and x is finite if e == 0x7f80_0000_u32 { force_eval!(x + x); } let ux_f = f32::from_bits(ux_i); // raise underflow if ux_f is subnormal or zero if e == 0 { force_eval!(x * x + ux_f * ux_f); } ux_f } compiler_builtins-0.1.101/libm/src/math/pow.rs000064400000000000000000000520521046102023000173310ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_pow.c */ /* * ==================================================== * Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. * * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ // pow(x,y) return x**y // // n // Method: Let x = 2 * (1+f) // 1. Compute and return log2(x) in two pieces: // log2(x) = w1 + w2, // where w1 has 53-24 = 29 bit trailing zeros. // 2. Perform y*log2(x) = n+y' by simulating muti-precision // arithmetic, where |y'|<=0.5. // 3. Return x**y = 2**n*exp(y'*log2) // // Special cases: // 1. (anything) ** 0 is 1 // 2. 1 ** (anything) is 1 // 3. (anything except 1) ** NAN is NAN // 4. NAN ** (anything except 0) is NAN // 5. +-(|x| > 1) ** +INF is +INF // 6. +-(|x| > 1) ** -INF is +0 // 7. +-(|x| < 1) ** +INF is +0 // 8. +-(|x| < 1) ** -INF is +INF // 9. -1 ** +-INF is 1 // 10. +0 ** (+anything except 0, NAN) is +0 // 11. -0 ** (+anything except 0, NAN, odd integer) is +0 // 12. +0 ** (-anything except 0, NAN) is +INF, raise divbyzero // 13. -0 ** (-anything except 0, NAN, odd integer) is +INF, raise divbyzero // 14. -0 ** (+odd integer) is -0 // 15. -0 ** (-odd integer) is -INF, raise divbyzero // 16. +INF ** (+anything except 0,NAN) is +INF // 17. +INF ** (-anything except 0,NAN) is +0 // 18. -INF ** (+odd integer) is -INF // 19. -INF ** (anything) = -0 ** (-anything), (anything except odd integer) // 20. (anything) ** 1 is (anything) // 21. (anything) ** -1 is 1/(anything) // 22. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) // 23. (-anything except 0 and inf) ** (non-integer) is NAN // // Accuracy: // pow(x,y) returns x**y nearly rounded. In particular // pow(integer,integer) // always returns the correct integer provided it is // representable. // // Constants : // The hexadecimal values are the intended ones for the following // constants. The decimal values may be used, provided that the // compiler will convert from decimal to binary accurately enough // to produce the hexadecimal values shown. // use super::{fabs, get_high_word, scalbn, sqrt, with_set_high_word, with_set_low_word}; const BP: [f64; 2] = [1.0, 1.5]; const DP_H: [f64; 2] = [0.0, 5.84962487220764160156e-01]; /* 0x3fe2b803_40000000 */ const DP_L: [f64; 2] = [0.0, 1.35003920212974897128e-08]; /* 0x3E4CFDEB, 0x43CFD006 */ const TWO53: f64 = 9007199254740992.0; /* 0x43400000_00000000 */ const HUGE: f64 = 1.0e300; const TINY: f64 = 1.0e-300; // poly coefs for (3/2)*(log(x)-2s-2/3*s**3: const L1: f64 = 5.99999999999994648725e-01; /* 0x3fe33333_33333303 */ const L2: f64 = 4.28571428578550184252e-01; /* 0x3fdb6db6_db6fabff */ const L3: f64 = 3.33333329818377432918e-01; /* 0x3fd55555_518f264d */ const L4: f64 = 2.72728123808534006489e-01; /* 0x3fd17460_a91d4101 */ const L5: f64 = 2.30660745775561754067e-01; /* 0x3fcd864a_93c9db65 */ const L6: f64 = 2.06975017800338417784e-01; /* 0x3fca7e28_4a454eef */ const P1: f64 = 1.66666666666666019037e-01; /* 0x3fc55555_5555553e */ const P2: f64 = -2.77777777770155933842e-03; /* 0xbf66c16c_16bebd93 */ const P3: f64 = 6.61375632143793436117e-05; /* 0x3f11566a_af25de2c */ const P4: f64 = -1.65339022054652515390e-06; /* 0xbebbbd41_c5d26bf1 */ const P5: f64 = 4.13813679705723846039e-08; /* 0x3e663769_72bea4d0 */ const LG2: f64 = 6.93147180559945286227e-01; /* 0x3fe62e42_fefa39ef */ const LG2_H: f64 = 6.93147182464599609375e-01; /* 0x3fe62e43_00000000 */ const LG2_L: f64 = -1.90465429995776804525e-09; /* 0xbe205c61_0ca86c39 */ const OVT: f64 = 8.0085662595372944372e-017; /* -(1024-log2(ovfl+.5ulp)) */ const CP: f64 = 9.61796693925975554329e-01; /* 0x3feec709_dc3a03fd =2/(3ln2) */ const CP_H: f64 = 9.61796700954437255859e-01; /* 0x3feec709_e0000000 =(float)cp */ const CP_L: f64 = -7.02846165095275826516e-09; /* 0xbe3e2fe0_145b01f5 =tail of cp_h*/ const IVLN2: f64 = 1.44269504088896338700e+00; /* 0x3ff71547_652b82fe =1/ln2 */ const IVLN2_H: f64 = 1.44269502162933349609e+00; /* 0x3ff71547_60000000 =24b 1/ln2*/ const IVLN2_L: f64 = 1.92596299112661746887e-08; /* 0x3e54ae0b_f85ddf44 =1/ln2 tail*/ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn pow(x: f64, y: f64) -> f64 { let t1: f64; let t2: f64; let (hx, lx): (i32, u32) = ((x.to_bits() >> 32) as i32, x.to_bits() as u32); let (hy, ly): (i32, u32) = ((y.to_bits() >> 32) as i32, y.to_bits() as u32); let mut ix: i32 = (hx & 0x7fffffff) as i32; let iy: i32 = (hy & 0x7fffffff) as i32; /* x**0 = 1, even if x is NaN */ if ((iy as u32) | ly) == 0 { return 1.0; } /* 1**y = 1, even if y is NaN */ if hx == 0x3ff00000 && lx == 0 { return 1.0; } /* NaN if either arg is NaN */ if ix > 0x7ff00000 || (ix == 0x7ff00000 && lx != 0) || iy > 0x7ff00000 || (iy == 0x7ff00000 && ly != 0) { return x + y; } /* determine if y is an odd int when x < 0 * yisint = 0 ... y is not an integer * yisint = 1 ... y is an odd int * yisint = 2 ... y is an even int */ let mut yisint: i32 = 0; let mut k: i32; let mut j: i32; if hx < 0 { if iy >= 0x43400000 { yisint = 2; /* even integer y */ } else if iy >= 0x3ff00000 { k = (iy >> 20) - 0x3ff; /* exponent */ if k > 20 { j = (ly >> (52 - k)) as i32; if (j << (52 - k)) == (ly as i32) { yisint = 2 - (j & 1); } } else if ly == 0 { j = iy >> (20 - k); if (j << (20 - k)) == iy { yisint = 2 - (j & 1); } } } } if ly == 0 { /* special value of y */ if iy == 0x7ff00000 { /* y is +-inf */ return if ((ix - 0x3ff00000) | (lx as i32)) == 0 { /* (-1)**+-inf is 1 */ 1.0 } else if ix >= 0x3ff00000 { /* (|x|>1)**+-inf = inf,0 */ if hy >= 0 { y } else { 0.0 } } else { /* (|x|<1)**+-inf = 0,inf */ if hy >= 0 { 0.0 } else { -y } }; } if iy == 0x3ff00000 { /* y is +-1 */ return if hy >= 0 { x } else { 1.0 / x }; } if hy == 0x40000000 { /* y is 2 */ return x * x; } if hy == 0x3fe00000 { /* y is 0.5 */ if hx >= 0 { /* x >= +0 */ return sqrt(x); } } } let mut ax: f64 = fabs(x); if lx == 0 { /* special value of x */ if ix == 0x7ff00000 || ix == 0 || ix == 0x3ff00000 { /* x is +-0,+-inf,+-1 */ let mut z: f64 = ax; if hy < 0 { /* z = (1/|x|) */ z = 1.0 / z; } if hx < 0 { if ((ix - 0x3ff00000) | yisint) == 0 { z = (z - z) / (z - z); /* (-1)**non-int is NaN */ } else if yisint == 1 { z = -z; /* (x<0)**odd = -(|x|**odd) */ } } return z; } } let mut s: f64 = 1.0; /* sign of result */ if hx < 0 { if yisint == 0 { /* (x<0)**(non-int) is NaN */ return (x - x) / (x - x); } if yisint == 1 { /* (x<0)**(odd int) */ s = -1.0; } } /* |y| is HUGE */ if iy > 0x41e00000 { /* if |y| > 2**31 */ if iy > 0x43f00000 { /* if |y| > 2**64, must o/uflow */ if ix <= 0x3fefffff { return if hy < 0 { HUGE * HUGE } else { TINY * TINY }; } if ix >= 0x3ff00000 { return if hy > 0 { HUGE * HUGE } else { TINY * TINY }; } } /* over/underflow if x is not close to one */ if ix < 0x3fefffff { return if hy < 0 { s * HUGE * HUGE } else { s * TINY * TINY }; } if ix > 0x3ff00000 { return if hy > 0 { s * HUGE * HUGE } else { s * TINY * TINY }; } /* now |1-x| is TINY <= 2**-20, suffice to compute log(x) by x-x^2/2+x^3/3-x^4/4 */ let t: f64 = ax - 1.0; /* t has 20 trailing zeros */ let w: f64 = (t * t) * (0.5 - t * (0.3333333333333333333333 - t * 0.25)); let u: f64 = IVLN2_H * t; /* ivln2_h has 21 sig. bits */ let v: f64 = t * IVLN2_L - w * IVLN2; t1 = with_set_low_word(u + v, 0); t2 = v - (t1 - u); } else { // double ss,s2,s_h,s_l,t_h,t_l; let mut n: i32 = 0; if ix < 0x00100000 { /* take care subnormal number */ ax *= TWO53; n -= 53; ix = get_high_word(ax) as i32; } n += (ix >> 20) - 0x3ff; j = ix & 0x000fffff; /* determine interval */ let k: i32; ix = j | 0x3ff00000; /* normalize ix */ if j <= 0x3988E { /* |x|> 1) | 0x20000000) + 0x00080000 + ((k as u32) << 18), ); let t_l: f64 = ax - (t_h - i!(BP, k as usize)); let s_l: f64 = v * ((u - s_h * t_h) - s_h * t_l); /* compute log(ax) */ let s2: f64 = ss * ss; let mut r: f64 = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); r += s_l * (s_h + ss); let s2: f64 = s_h * s_h; let t_h: f64 = with_set_low_word(3.0 + s2 + r, 0); let t_l: f64 = r - ((t_h - 3.0) - s2); /* u+v = ss*(1+...) */ let u: f64 = s_h * t_h; let v: f64 = s_l * t_h + t_l * ss; /* 2/(3log2)*(ss+...) */ let p_h: f64 = with_set_low_word(u + v, 0); let p_l = v - (p_h - u); let z_h: f64 = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ let z_l: f64 = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); /* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */ let t: f64 = n as f64; t1 = with_set_low_word(((z_h + z_l) + i!(DP_H, k as usize)) + t, 0); t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); } /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ let y1: f64 = with_set_low_word(y, 0); let p_l: f64 = (y - y1) * t1 + y * t2; let mut p_h: f64 = y1 * t1; let z: f64 = p_l + p_h; let mut j: i32 = (z.to_bits() >> 32) as i32; let i: i32 = z.to_bits() as i32; // let (j, i): (i32, i32) = ((z.to_bits() >> 32) as i32, z.to_bits() as i32); if j >= 0x40900000 { /* z >= 1024 */ if (j - 0x40900000) | i != 0 { /* if z > 1024 */ return s * HUGE * HUGE; /* overflow */ } if p_l + OVT > z - p_h { return s * HUGE * HUGE; /* overflow */ } } else if (j & 0x7fffffff) >= 0x4090cc00 { /* z <= -1075 */ // FIXME: instead of abs(j) use unsigned j if (((j as u32) - 0xc090cc00) | (i as u32)) != 0 { /* z < -1075 */ return s * TINY * TINY; /* underflow */ } if p_l <= z - p_h { return s * TINY * TINY; /* underflow */ } } /* compute 2**(p_h+p_l) */ let i: i32 = j & (0x7fffffff as i32); k = (i >> 20) - 0x3ff; let mut n: i32 = 0; if i > 0x3fe00000 { /* if |z| > 0.5, set n = [z+0.5] */ n = j + (0x00100000 >> (k + 1)); k = ((n & 0x7fffffff) >> 20) - 0x3ff; /* new k for n */ let t: f64 = with_set_high_word(0.0, (n & !(0x000fffff >> k)) as u32); n = ((n & 0x000fffff) | 0x00100000) >> (20 - k); if j < 0 { n = -n; } p_h -= t; } let t: f64 = with_set_low_word(p_l + p_h, 0); let u: f64 = t * LG2_H; let v: f64 = (p_l - (t - p_h)) * LG2 + t * LG2_L; let mut z: f64 = u + v; let w: f64 = v - (z - u); let t: f64 = z * z; let t1: f64 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); let r: f64 = (z * t1) / (t1 - 2.0) - (w + z * w); z = 1.0 - (r - z); j = get_high_word(z) as i32; j += n << 20; if (j >> 20) <= 0 { /* subnormal output */ z = scalbn(z, n); } else { z = with_set_high_word(z, j as u32); } s * z } #[cfg(test)] mod tests { extern crate core; use self::core::f64::consts::{E, PI}; use self::core::f64::{EPSILON, INFINITY, MAX, MIN, MIN_POSITIVE, NAN, NEG_INFINITY}; use super::pow; const POS_ZERO: &[f64] = &[0.0]; const NEG_ZERO: &[f64] = &[-0.0]; const POS_ONE: &[f64] = &[1.0]; const NEG_ONE: &[f64] = &[-1.0]; const POS_FLOATS: &[f64] = &[99.0 / 70.0, E, PI]; const NEG_FLOATS: &[f64] = &[-99.0 / 70.0, -E, -PI]; const POS_SMALL_FLOATS: &[f64] = &[(1.0 / 2.0), MIN_POSITIVE, EPSILON]; const NEG_SMALL_FLOATS: &[f64] = &[-(1.0 / 2.0), -MIN_POSITIVE, -EPSILON]; const POS_EVENS: &[f64] = &[2.0, 6.0, 8.0, 10.0, 22.0, 100.0, MAX]; const NEG_EVENS: &[f64] = &[MIN, -100.0, -22.0, -10.0, -8.0, -6.0, -2.0]; const POS_ODDS: &[f64] = &[3.0, 7.0]; const NEG_ODDS: &[f64] = &[-7.0, -3.0]; const NANS: &[f64] = &[NAN]; const POS_INF: &[f64] = &[INFINITY]; const NEG_INF: &[f64] = &[NEG_INFINITY]; const ALL: &[&[f64]] = &[ POS_ZERO, NEG_ZERO, NANS, NEG_SMALL_FLOATS, POS_SMALL_FLOATS, NEG_FLOATS, POS_FLOATS, NEG_EVENS, POS_EVENS, NEG_ODDS, POS_ODDS, NEG_INF, POS_INF, NEG_ONE, POS_ONE, ]; const POS: &[&[f64]] = &[POS_ZERO, POS_ODDS, POS_ONE, POS_FLOATS, POS_EVENS, POS_INF]; const NEG: &[&[f64]] = &[NEG_ZERO, NEG_ODDS, NEG_ONE, NEG_FLOATS, NEG_EVENS, NEG_INF]; fn pow_test(base: f64, exponent: f64, expected: f64) { let res = pow(base, exponent); assert!( if expected.is_nan() { res.is_nan() } else { pow(base, exponent) == expected }, "{} ** {} was {} instead of {}", base, exponent, res, expected ); } fn test_sets_as_base(sets: &[&[f64]], exponent: f64, expected: f64) { sets.iter() .for_each(|s| s.iter().for_each(|val| pow_test(*val, exponent, expected))); } fn test_sets_as_exponent(base: f64, sets: &[&[f64]], expected: f64) { sets.iter() .for_each(|s| s.iter().for_each(|val| pow_test(base, *val, expected))); } fn test_sets(sets: &[&[f64]], computed: &dyn Fn(f64) -> f64, expected: &dyn Fn(f64) -> f64) { sets.iter().for_each(|s| { s.iter().for_each(|val| { let exp = expected(*val); let res = computed(*val); #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] let exp = force_eval!(exp); #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] let res = force_eval!(res); assert!( if exp.is_nan() { res.is_nan() } else { exp == res }, "test for {} was {} instead of {}", val, res, exp ); }) }); } #[test] fn zero_as_exponent() { test_sets_as_base(ALL, 0.0, 1.0); test_sets_as_base(ALL, -0.0, 1.0); } #[test] fn one_as_base() { test_sets_as_exponent(1.0, ALL, 1.0); } #[test] fn nan_inputs() { // NAN as the base: // (NAN ^ anything *but 0* should be NAN) test_sets_as_exponent(NAN, &ALL[2..], NAN); // NAN as the exponent: // (anything *but 1* ^ NAN should be NAN) test_sets_as_base(&ALL[..(ALL.len() - 2)], NAN, NAN); } #[test] fn infinity_as_base() { // Positive Infinity as the base: // (+Infinity ^ positive anything but 0 and NAN should be +Infinity) test_sets_as_exponent(INFINITY, &POS[1..], INFINITY); // (+Infinity ^ negative anything except 0 and NAN should be 0.0) test_sets_as_exponent(INFINITY, &NEG[1..], 0.0); // Negative Infinity as the base: // (-Infinity ^ positive odd ints should be -Infinity) test_sets_as_exponent(NEG_INFINITY, &[POS_ODDS], NEG_INFINITY); // (-Infinity ^ anything but odd ints should be == -0 ^ (-anything)) // We can lump in pos/neg odd ints here because they don't seem to // cause panics (div by zero) in release mode (I think). test_sets(ALL, &|v: f64| pow(NEG_INFINITY, v), &|v: f64| pow(-0.0, -v)); } #[test] fn infinity_as_exponent() { // Positive/Negative base greater than 1: // (pos/neg > 1 ^ Infinity should be Infinity - note this excludes NAN as the base) test_sets_as_base(&ALL[5..(ALL.len() - 2)], INFINITY, INFINITY); // (pos/neg > 1 ^ -Infinity should be 0.0) test_sets_as_base(&ALL[5..ALL.len() - 2], NEG_INFINITY, 0.0); // Positive/Negative base less than 1: let base_below_one = &[POS_ZERO, NEG_ZERO, NEG_SMALL_FLOATS, POS_SMALL_FLOATS]; // (pos/neg < 1 ^ Infinity should be 0.0 - this also excludes NAN as the base) test_sets_as_base(base_below_one, INFINITY, 0.0); // (pos/neg < 1 ^ -Infinity should be Infinity) test_sets_as_base(base_below_one, NEG_INFINITY, INFINITY); // Positive/Negative 1 as the base: // (pos/neg 1 ^ Infinity should be 1) test_sets_as_base(&[NEG_ONE, POS_ONE], INFINITY, 1.0); // (pos/neg 1 ^ -Infinity should be 1) test_sets_as_base(&[NEG_ONE, POS_ONE], NEG_INFINITY, 1.0); } #[test] fn zero_as_base() { // Positive Zero as the base: // (+0 ^ anything positive but 0 and NAN should be +0) test_sets_as_exponent(0.0, &POS[1..], 0.0); // (+0 ^ anything negative but 0 and NAN should be Infinity) // (this should panic because we're dividing by zero) test_sets_as_exponent(0.0, &NEG[1..], INFINITY); // Negative Zero as the base: // (-0 ^ anything positive but 0, NAN, and odd ints should be +0) test_sets_as_exponent(-0.0, &POS[3..], 0.0); // (-0 ^ anything negative but 0, NAN, and odd ints should be Infinity) // (should panic because of divide by zero) test_sets_as_exponent(-0.0, &NEG[3..], INFINITY); // (-0 ^ positive odd ints should be -0) test_sets_as_exponent(-0.0, &[POS_ODDS], -0.0); // (-0 ^ negative odd ints should be -Infinity) // (should panic because of divide by zero) test_sets_as_exponent(-0.0, &[NEG_ODDS], NEG_INFINITY); } #[test] fn special_cases() { // One as the exponent: // (anything ^ 1 should be anything - i.e. the base) test_sets(ALL, &|v: f64| pow(v, 1.0), &|v: f64| v); // Negative One as the exponent: // (anything ^ -1 should be 1/anything) test_sets(ALL, &|v: f64| pow(v, -1.0), &|v: f64| 1.0 / v); // Factoring -1 out: // (negative anything ^ integer should be (-1 ^ integer) * (positive anything ^ integer)) (&[POS_ZERO, NEG_ZERO, POS_ONE, NEG_ONE, POS_EVENS, NEG_EVENS]) .iter() .for_each(|int_set| { int_set.iter().for_each(|int| { test_sets(ALL, &|v: f64| pow(-v, *int), &|v: f64| { pow(-1.0, *int) * pow(v, *int) }); }) }); // Negative base (imaginary results): // (-anything except 0 and Infinity ^ non-integer should be NAN) (&NEG[1..(NEG.len() - 1)]).iter().for_each(|set| { set.iter().for_each(|val| { test_sets(&ALL[3..7], &|v: f64| pow(*val, v), &|_| NAN); }) }); } #[test] fn normal_cases() { assert_eq!(pow(2.0, 20.0), (1 << 20) as f64); assert_eq!(pow(-1.0, 9.0), -1.0); assert!(pow(-1.0, 2.2).is_nan()); assert!(pow(-1.0, -1.14).is_nan()); } } compiler_builtins-0.1.101/libm/src/math/powf.rs000064400000000000000000000234671046102023000175070ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_powf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{fabsf, scalbnf, sqrtf}; const BP: [f32; 2] = [1.0, 1.5]; const DP_H: [f32; 2] = [0.0, 5.84960938e-01]; /* 0x3f15c000 */ const DP_L: [f32; 2] = [0.0, 1.56322085e-06]; /* 0x35d1cfdc */ const TWO24: f32 = 16777216.0; /* 0x4b800000 */ const HUGE: f32 = 1.0e30; const TINY: f32 = 1.0e-30; const L1: f32 = 6.0000002384e-01; /* 0x3f19999a */ const L2: f32 = 4.2857143283e-01; /* 0x3edb6db7 */ const L3: f32 = 3.3333334327e-01; /* 0x3eaaaaab */ const L4: f32 = 2.7272811532e-01; /* 0x3e8ba305 */ const L5: f32 = 2.3066075146e-01; /* 0x3e6c3255 */ const L6: f32 = 2.0697501302e-01; /* 0x3e53f142 */ const P1: f32 = 1.6666667163e-01; /* 0x3e2aaaab */ const P2: f32 = -2.7777778450e-03; /* 0xbb360b61 */ const P3: f32 = 6.6137559770e-05; /* 0x388ab355 */ const P4: f32 = -1.6533901999e-06; /* 0xb5ddea0e */ const P5: f32 = 4.1381369442e-08; /* 0x3331bb4c */ const LG2: f32 = 6.9314718246e-01; /* 0x3f317218 */ const LG2_H: f32 = 6.93145752e-01; /* 0x3f317200 */ const LG2_L: f32 = 1.42860654e-06; /* 0x35bfbe8c */ const OVT: f32 = 4.2995665694e-08; /* -(128-log2(ovfl+.5ulp)) */ const CP: f32 = 9.6179670095e-01; /* 0x3f76384f =2/(3ln2) */ const CP_H: f32 = 9.6191406250e-01; /* 0x3f764000 =12b cp */ const CP_L: f32 = -1.1736857402e-04; /* 0xb8f623c6 =tail of cp_h */ const IVLN2: f32 = 1.4426950216e+00; const IVLN2_H: f32 = 1.4426879883e+00; const IVLN2_L: f32 = 7.0526075433e-06; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn powf(x: f32, y: f32) -> f32 { let mut z: f32; let mut ax: f32; let z_h: f32; let z_l: f32; let mut p_h: f32; let mut p_l: f32; let y1: f32; let mut t1: f32; let t2: f32; let mut r: f32; let s: f32; let mut sn: f32; let mut t: f32; let mut u: f32; let mut v: f32; let mut w: f32; let i: i32; let mut j: i32; let mut k: i32; let mut yisint: i32; let mut n: i32; let hx: i32; let hy: i32; let mut ix: i32; let iy: i32; let mut is: i32; hx = x.to_bits() as i32; hy = y.to_bits() as i32; ix = hx & 0x7fffffff; iy = hy & 0x7fffffff; /* x**0 = 1, even if x is NaN */ if iy == 0 { return 1.0; } /* 1**y = 1, even if y is NaN */ if hx == 0x3f800000 { return 1.0; } /* NaN if either arg is NaN */ if ix > 0x7f800000 || iy > 0x7f800000 { return x + y; } /* determine if y is an odd int when x < 0 * yisint = 0 ... y is not an integer * yisint = 1 ... y is an odd int * yisint = 2 ... y is an even int */ yisint = 0; if hx < 0 { if iy >= 0x4b800000 { yisint = 2; /* even integer y */ } else if iy >= 0x3f800000 { k = (iy >> 23) - 0x7f; /* exponent */ j = iy >> (23 - k); if (j << (23 - k)) == iy { yisint = 2 - (j & 1); } } } /* special value of y */ if iy == 0x7f800000 { /* y is +-inf */ if ix == 0x3f800000 { /* (-1)**+-inf is 1 */ return 1.0; } else if ix > 0x3f800000 { /* (|x|>1)**+-inf = inf,0 */ return if hy >= 0 { y } else { 0.0 }; } else { /* (|x|<1)**+-inf = 0,inf */ return if hy >= 0 { 0.0 } else { -y }; } } if iy == 0x3f800000 { /* y is +-1 */ return if hy >= 0 { x } else { 1.0 / x }; } if hy == 0x40000000 { /* y is 2 */ return x * x; } if hy == 0x3f000000 /* y is 0.5 */ && hx >= 0 { /* x >= +0 */ return sqrtf(x); } ax = fabsf(x); /* special value of x */ if ix == 0x7f800000 || ix == 0 || ix == 0x3f800000 { /* x is +-0,+-inf,+-1 */ z = ax; if hy < 0 { /* z = (1/|x|) */ z = 1.0 / z; } if hx < 0 { if ((ix - 0x3f800000) | yisint) == 0 { z = (z - z) / (z - z); /* (-1)**non-int is NaN */ } else if yisint == 1 { z = -z; /* (x<0)**odd = -(|x|**odd) */ } } return z; } sn = 1.0; /* sign of result */ if hx < 0 { if yisint == 0 { /* (x<0)**(non-int) is NaN */ return (x - x) / (x - x); } if yisint == 1 { /* (x<0)**(odd int) */ sn = -1.0; } } /* |y| is HUGE */ if iy > 0x4d000000 { /* if |y| > 2**27 */ /* over/underflow if x is not close to one */ if ix < 0x3f7ffff8 { return if hy < 0 { sn * HUGE * HUGE } else { sn * TINY * TINY }; } if ix > 0x3f800007 { return if hy > 0 { sn * HUGE * HUGE } else { sn * TINY * TINY }; } /* now |1-x| is TINY <= 2**-20, suffice to compute log(x) by x-x^2/2+x^3/3-x^4/4 */ t = ax - 1.; /* t has 20 trailing zeros */ w = (t * t) * (0.5 - t * (0.333333333333 - t * 0.25)); u = IVLN2_H * t; /* IVLN2_H has 16 sig. bits */ v = t * IVLN2_L - w * IVLN2; t1 = u + v; is = t1.to_bits() as i32; t1 = f32::from_bits(is as u32 & 0xfffff000); t2 = v - (t1 - u); } else { let mut s2: f32; let mut s_h: f32; let s_l: f32; let mut t_h: f32; let mut t_l: f32; n = 0; /* take care subnormal number */ if ix < 0x00800000 { ax *= TWO24; n -= 24; ix = ax.to_bits() as i32; } n += ((ix) >> 23) - 0x7f; j = ix & 0x007fffff; /* determine interval */ ix = j | 0x3f800000; /* normalize ix */ if j <= 0x1cc471 { /* |x|> 1) & 0xfffff000) | 0x20000000) as i32; t_h = f32::from_bits(is as u32 + 0x00400000 + ((k as u32) << 21)); t_l = ax - (t_h - i!(BP, k as usize)); s_l = v * ((u - s_h * t_h) - s_h * t_l); /* compute log(ax) */ s2 = s * s; r = s2 * s2 * (L1 + s2 * (L2 + s2 * (L3 + s2 * (L4 + s2 * (L5 + s2 * L6))))); r += s_l * (s_h + s); s2 = s_h * s_h; t_h = 3.0 + s2 + r; is = t_h.to_bits() as i32; t_h = f32::from_bits(is as u32 & 0xfffff000); t_l = r - ((t_h - 3.0) - s2); /* u+v = s*(1+...) */ u = s_h * t_h; v = s_l * t_h + t_l * s; /* 2/(3log2)*(s+...) */ p_h = u + v; is = p_h.to_bits() as i32; p_h = f32::from_bits(is as u32 & 0xfffff000); p_l = v - (p_h - u); z_h = CP_H * p_h; /* cp_h+cp_l = 2/(3*log2) */ z_l = CP_L * p_h + p_l * CP + i!(DP_L, k as usize); /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ t = n as f32; t1 = ((z_h + z_l) + i!(DP_H, k as usize)) + t; is = t1.to_bits() as i32; t1 = f32::from_bits(is as u32 & 0xfffff000); t2 = z_l - (((t1 - t) - i!(DP_H, k as usize)) - z_h); }; /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ is = y.to_bits() as i32; y1 = f32::from_bits(is as u32 & 0xfffff000); p_l = (y - y1) * t1 + y * t2; p_h = y1 * t1; z = p_l + p_h; j = z.to_bits() as i32; if j > 0x43000000 { /* if z > 128 */ return sn * HUGE * HUGE; /* overflow */ } else if j == 0x43000000 { /* if z == 128 */ if p_l + OVT > z - p_h { return sn * HUGE * HUGE; /* overflow */ } } else if (j & 0x7fffffff) > 0x43160000 { /* z < -150 */ // FIXME: check should be (uint32_t)j > 0xc3160000 return sn * TINY * TINY; /* underflow */ } else if j as u32 == 0xc3160000 /* z == -150 */ && p_l <= z - p_h { return sn * TINY * TINY; /* underflow */ } /* * compute 2**(p_h+p_l) */ i = j & 0x7fffffff; k = (i >> 23) - 0x7f; n = 0; if i > 0x3f000000 { /* if |z| > 0.5, set n = [z+0.5] */ n = j + (0x00800000 >> (k + 1)); k = ((n & 0x7fffffff) >> 23) - 0x7f; /* new k for n */ t = f32::from_bits(n as u32 & !(0x007fffff >> k)); n = ((n & 0x007fffff) | 0x00800000) >> (23 - k); if j < 0 { n = -n; } p_h -= t; } t = p_l + p_h; is = t.to_bits() as i32; t = f32::from_bits(is as u32 & 0xffff8000); u = t * LG2_H; v = (p_l - (t - p_h)) * LG2 + t * LG2_L; z = u + v; w = v - (z - u); t = z * z; t1 = z - t * (P1 + t * (P2 + t * (P3 + t * (P4 + t * P5)))); r = (z * t1) / (t1 - 2.0) - (w + z * w); z = 1.0 - (r - z); j = z.to_bits() as i32; j += n << 23; if (j >> 23) <= 0 { /* subnormal output */ z = scalbnf(z, n); } else { z = f32::from_bits(j as u32); } sn * z } compiler_builtins-0.1.101/libm/src/math/rem_pio2.rs000064400000000000000000000165151046102023000202440ustar 00000000000000// origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c // // ==================================================== // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. // // Developed at SunPro, a Sun Microsystems, Inc. business. // Permission to use, copy, modify, and distribute this // software is freely granted, provided that this notice // is preserved. // ==================================================== // // Optimized by Bruce D. Evans. */ use super::rem_pio2_large; // #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1 // #define EPS DBL_EPSILON const EPS: f64 = 2.2204460492503131e-16; // #elif FLT_EVAL_METHOD==2 // #define EPS LDBL_EPSILON // #endif // TODO: Support FLT_EVAL_METHOD? const TO_INT: f64 = 1.5 / EPS; /// 53 bits of 2/pi const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ /// first 33 bits of pi/2 const PIO2_1: f64 = 1.57079632673412561417e+00; /* 0x3FF921FB, 0x54400000 */ /// pi/2 - PIO2_1 const PIO2_1T: f64 = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */ /// second 33 bits of pi/2 const PIO2_2: f64 = 6.07710050630396597660e-11; /* 0x3DD0B461, 0x1A600000 */ /// pi/2 - (PIO2_1+PIO2_2) const PIO2_2T: f64 = 2.02226624879595063154e-21; /* 0x3BA3198A, 0x2E037073 */ /// third 33 bits of pi/2 const PIO2_3: f64 = 2.02226624871116645580e-21; /* 0x3BA3198A, 0x2E000000 */ /// pi/2 - (PIO2_1+PIO2_2+PIO2_3) const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ // return the remainder of x rem pi/2 in y[0]+y[1] // use rem_pio2_large() for large x // // caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) { let x1p24 = f64::from_bits(0x4170000000000000); let sign = (f64::to_bits(x) >> 63) as i32; let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; fn medium(x: f64, ix: u32) -> (i32, f64, f64) { /* rint(x/(pi/2)), Assume round-to-nearest. */ let tmp = x as f64 * INV_PIO2 + TO_INT; // force rounding of tmp to it's storage format on x87 to avoid // excess precision issues. #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] let tmp = force_eval!(tmp); let f_n = tmp - TO_INT; let n = f_n as i32; let mut r = x - f_n * PIO2_1; let mut w = f_n * PIO2_1T; /* 1st round, good to 85 bits */ let mut y0 = r - w; let ui = f64::to_bits(y0); let ey = (ui >> 52) as i32 & 0x7ff; let ex = (ix >> 20) as i32; if ex - ey > 16 { /* 2nd round, good to 118 bits */ let t = r; w = f_n * PIO2_2; r = t - w; w = f_n * PIO2_2T - ((t - r) - w); y0 = r - w; let ey = (f64::to_bits(y0) >> 52) as i32 & 0x7ff; if ex - ey > 49 { /* 3rd round, good to 151 bits, covers all cases */ let t = r; w = f_n * PIO2_3; r = t - w; w = f_n * PIO2_3T - ((t - r) - w); y0 = r - w; } } let y1 = (r - y0) - w; (n, y0, y1) } if ix <= 0x400f6a7a { /* |x| ~<= 5pi/4 */ if (ix & 0xfffff) == 0x921fb { /* |x| ~= pi/2 or 2pi/2 */ return medium(x, ix); /* cancellation -- use medium case */ } if ix <= 0x4002d97c { /* |x| ~<= 3pi/4 */ if sign == 0 { let z = x - PIO2_1; /* one round good to 85 bits */ let y0 = z - PIO2_1T; let y1 = (z - y0) - PIO2_1T; return (1, y0, y1); } else { let z = x + PIO2_1; let y0 = z + PIO2_1T; let y1 = (z - y0) + PIO2_1T; return (-1, y0, y1); } } else if sign == 0 { let z = x - 2.0 * PIO2_1; let y0 = z - 2.0 * PIO2_1T; let y1 = (z - y0) - 2.0 * PIO2_1T; return (2, y0, y1); } else { let z = x + 2.0 * PIO2_1; let y0 = z + 2.0 * PIO2_1T; let y1 = (z - y0) + 2.0 * PIO2_1T; return (-2, y0, y1); } } if ix <= 0x401c463b { /* |x| ~<= 9pi/4 */ if ix <= 0x4015fdbc { /* |x| ~<= 7pi/4 */ if ix == 0x4012d97c { /* |x| ~= 3pi/2 */ return medium(x, ix); } if sign == 0 { let z = x - 3.0 * PIO2_1; let y0 = z - 3.0 * PIO2_1T; let y1 = (z - y0) - 3.0 * PIO2_1T; return (3, y0, y1); } else { let z = x + 3.0 * PIO2_1; let y0 = z + 3.0 * PIO2_1T; let y1 = (z - y0) + 3.0 * PIO2_1T; return (-3, y0, y1); } } else { if ix == 0x401921fb { /* |x| ~= 4pi/2 */ return medium(x, ix); } if sign == 0 { let z = x - 4.0 * PIO2_1; let y0 = z - 4.0 * PIO2_1T; let y1 = (z - y0) - 4.0 * PIO2_1T; return (4, y0, y1); } else { let z = x + 4.0 * PIO2_1; let y0 = z + 4.0 * PIO2_1T; let y1 = (z - y0) + 4.0 * PIO2_1T; return (-4, y0, y1); } } } if ix < 0x413921fb { /* |x| ~< 2^20*(pi/2), medium size */ return medium(x, ix); } /* * all other (large) arguments */ if ix >= 0x7ff00000 { /* x is inf or NaN */ let y0 = x - x; let y1 = y0; return (0, y0, y1); } /* set z = scalbn(|x|,-ilogb(x)+23) */ let mut ui = f64::to_bits(x); ui &= (!1) >> 12; ui |= (0x3ff + 23) << 52; let mut z = f64::from_bits(ui); let mut tx = [0.0; 3]; for i in 0..2 { i!(tx,i, =, z as i32 as f64); z = (z - i!(tx, i)) * x1p24; } i!(tx,2, =, z); /* skip zero terms, first term is non-zero */ let mut i = 2; while i != 0 && i!(tx, i) == 0.0 { i -= 1; } let mut ty = [0.0; 3]; let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1); if sign != 0 { return (-n, -i!(ty, 0), -i!(ty, 1)); } (n, i!(ty, 0), i!(ty, 1)) } #[cfg(test)] mod tests { use super::rem_pio2; #[test] fn test_near_pi() { let arg = 3.141592025756836; let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, -6.278329573009626e-7, -2.1125998133974653e-23) ); let arg = 3.141592033207416; let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, -6.20382377148128e-7, -2.1125998133974653e-23) ); let arg = 3.141592144966125; let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, -5.086236681942706e-7, -2.1125998133974653e-23) ); let arg = 3.141592979431152; let arg = force_eval!(arg); assert_eq!( rem_pio2(arg), (2, 3.2584135866119817e-7, -2.1125998133974653e-23) ); } #[test] fn test_overflow_b9b847() { let _ = rem_pio2(-3054214.5490637687); } #[test] fn test_overflow_4747b9() { let _ = rem_pio2(917340800458.2274); } } compiler_builtins-0.1.101/libm/src/math/rem_pio2_large.rs000064400000000000000000000476021046102023000214170ustar 00000000000000#![allow(unused_unsafe)] /* origin: FreeBSD /usr/src/lib/msun/src/k_rem_pio2.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::floor; use super::scalbn; // initial value for jk const INIT_JK: [usize; 4] = [3, 4, 4, 6]; // Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi // // integer array, contains the (24*i)-th to (24*i+23)-th // bit of 2/pi after binary point. The corresponding // floating value is // // ipio2[i] * 2^(-24(i+1)). // // NB: This table must have at least (e0-3)/24 + jk terms. // For quad precision (e0 <= 16360, jk = 6), this is 686. #[cfg(any(target_pointer_width = "32", target_pointer_width = "16"))] const IPIO2: [i32; 66] = [ 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, ]; #[cfg(target_pointer_width = "64")] const IPIO2: [i32; 690] = [ 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, 0x47C419, 0xC367CD, 0xDCE809, 0x2A8359, 0xC4768B, 0x961CA6, 0xDDAF44, 0xD15719, 0x053EA5, 0xFF0705, 0x3F7E33, 0xE832C2, 0xDE4F98, 0x327DBB, 0xC33D26, 0xEF6B1E, 0x5EF89F, 0x3A1F35, 0xCAF27F, 0x1D87F1, 0x21907C, 0x7C246A, 0xFA6ED5, 0x772D30, 0x433B15, 0xC614B5, 0x9D19C3, 0xC2C4AD, 0x414D2C, 0x5D000C, 0x467D86, 0x2D71E3, 0x9AC69B, 0x006233, 0x7CD2B4, 0x97A7B4, 0xD55537, 0xF63ED7, 0x1810A3, 0xFC764D, 0x2A9D64, 0xABD770, 0xF87C63, 0x57B07A, 0xE71517, 0x5649C0, 0xD9D63B, 0x3884A7, 0xCB2324, 0x778AD6, 0x23545A, 0xB91F00, 0x1B0AF1, 0xDFCE19, 0xFF319F, 0x6A1E66, 0x615799, 0x47FBAC, 0xD87F7E, 0xB76522, 0x89E832, 0x60BFE6, 0xCDC4EF, 0x09366C, 0xD43F5D, 0xD7DE16, 0xDE3B58, 0x929BDE, 0x2822D2, 0xE88628, 0x4D58E2, 0x32CAC6, 0x16E308, 0xCB7DE0, 0x50C017, 0xA71DF3, 0x5BE018, 0x34132E, 0x621283, 0x014883, 0x5B8EF5, 0x7FB0AD, 0xF2E91E, 0x434A48, 0xD36710, 0xD8DDAA, 0x425FAE, 0xCE616A, 0xA4280A, 0xB499D3, 0xF2A606, 0x7F775C, 0x83C2A3, 0x883C61, 0x78738A, 0x5A8CAF, 0xBDD76F, 0x63A62D, 0xCBBFF4, 0xEF818D, 0x67C126, 0x45CA55, 0x36D9CA, 0xD2A828, 0x8D61C2, 0x77C912, 0x142604, 0x9B4612, 0xC459C4, 0x44C5C8, 0x91B24D, 0xF31700, 0xAD43D4, 0xE54929, 0x10D5FD, 0xFCBE00, 0xCC941E, 0xEECE70, 0xF53E13, 0x80F1EC, 0xC3E7B3, 0x28F8C7, 0x940593, 0x3E71C1, 0xB3092E, 0xF3450B, 0x9C1288, 0x7B20AB, 0x9FB52E, 0xC29247, 0x2F327B, 0x6D550C, 0x90A772, 0x1FE76B, 0x96CB31, 0x4A1679, 0xE27941, 0x89DFF4, 0x9794E8, 0x84E6E2, 0x973199, 0x6BED88, 0x365F5F, 0x0EFDBB, 0xB49A48, 0x6CA467, 0x427271, 0x325D8D, 0xB8159F, 0x09E5BC, 0x25318D, 0x3974F7, 0x1C0530, 0x010C0D, 0x68084B, 0x58EE2C, 0x90AA47, 0x02E774, 0x24D6BD, 0xA67DF7, 0x72486E, 0xEF169F, 0xA6948E, 0xF691B4, 0x5153D1, 0xF20ACF, 0x339820, 0x7E4BF5, 0x6863B2, 0x5F3EDD, 0x035D40, 0x7F8985, 0x295255, 0xC06437, 0x10D86D, 0x324832, 0x754C5B, 0xD4714E, 0x6E5445, 0xC1090B, 0x69F52A, 0xD56614, 0x9D0727, 0x50045D, 0xDB3BB4, 0xC576EA, 0x17F987, 0x7D6B49, 0xBA271D, 0x296996, 0xACCCC6, 0x5414AD, 0x6AE290, 0x89D988, 0x50722C, 0xBEA404, 0x940777, 0x7030F3, 0x27FC00, 0xA871EA, 0x49C266, 0x3DE064, 0x83DD97, 0x973FA3, 0xFD9443, 0x8C860D, 0xDE4131, 0x9D3992, 0x8C70DD, 0xE7B717, 0x3BDF08, 0x2B3715, 0xA0805C, 0x93805A, 0x921110, 0xD8E80F, 0xAF806C, 0x4BFFDB, 0x0F9038, 0x761859, 0x15A562, 0xBBCB61, 0xB989C7, 0xBD4010, 0x04F2D2, 0x277549, 0xF6B6EB, 0xBB22DB, 0xAA140A, 0x2F2689, 0x768364, 0x333B09, 0x1A940E, 0xAA3A51, 0xC2A31D, 0xAEEDAF, 0x12265C, 0x4DC26D, 0x9C7A2D, 0x9756C0, 0x833F03, 0xF6F009, 0x8C402B, 0x99316D, 0x07B439, 0x15200C, 0x5BC3D8, 0xC492F5, 0x4BADC6, 0xA5CA4E, 0xCD37A7, 0x36A9E6, 0x9492AB, 0x6842DD, 0xDE6319, 0xEF8C76, 0x528B68, 0x37DBFC, 0xABA1AE, 0x3115DF, 0xA1AE00, 0xDAFB0C, 0x664D64, 0xB705ED, 0x306529, 0xBF5657, 0x3AFF47, 0xB9F96A, 0xF3BE75, 0xDF9328, 0x3080AB, 0xF68C66, 0x15CB04, 0x0622FA, 0x1DE4D9, 0xA4B33D, 0x8F1B57, 0x09CD36, 0xE9424E, 0xA4BE13, 0xB52333, 0x1AAAF0, 0xA8654F, 0xA5C1D2, 0x0F3F0B, 0xCD785B, 0x76F923, 0x048B7B, 0x721789, 0x53A6C6, 0xE26E6F, 0x00EBEF, 0x584A9B, 0xB7DAC4, 0xBA66AA, 0xCFCF76, 0x1D02D1, 0x2DF1B1, 0xC1998C, 0x77ADC3, 0xDA4886, 0xA05DF7, 0xF480C6, 0x2FF0AC, 0x9AECDD, 0xBC5C3F, 0x6DDED0, 0x1FC790, 0xB6DB2A, 0x3A25A3, 0x9AAF00, 0x9353AD, 0x0457B6, 0xB42D29, 0x7E804B, 0xA707DA, 0x0EAA76, 0xA1597B, 0x2A1216, 0x2DB7DC, 0xFDE5FA, 0xFEDB89, 0xFDBE89, 0x6C76E4, 0xFCA906, 0x70803E, 0x156E85, 0xFF87FD, 0x073E28, 0x336761, 0x86182A, 0xEABD4D, 0xAFE7B3, 0x6E6D8F, 0x396795, 0x5BBF31, 0x48D784, 0x16DF30, 0x432DC7, 0x356125, 0xCE70C9, 0xB8CB30, 0xFD6CBF, 0xA200A4, 0xE46C05, 0xA0DD5A, 0x476F21, 0xD21262, 0x845CB9, 0x496170, 0xE0566B, 0x015299, 0x375550, 0xB7D51E, 0xC4F133, 0x5F6E13, 0xE4305D, 0xA92E85, 0xC3B21D, 0x3632A1, 0xA4B708, 0xD4B1EA, 0x21F716, 0xE4698F, 0x77FF27, 0x80030C, 0x2D408D, 0xA0CD4F, 0x99A520, 0xD3A2B3, 0x0A5D2F, 0x42F9B4, 0xCBDA11, 0xD0BE7D, 0xC1DB9B, 0xBD17AB, 0x81A2CA, 0x5C6A08, 0x17552E, 0x550027, 0xF0147F, 0x8607E1, 0x640B14, 0x8D4196, 0xDEBE87, 0x2AFDDA, 0xB6256B, 0x34897B, 0xFEF305, 0x9EBFB9, 0x4F6A68, 0xA82A4A, 0x5AC44F, 0xBCF82D, 0x985AD7, 0x95C7F4, 0x8D4D0D, 0xA63A20, 0x5F57A4, 0xB13F14, 0x953880, 0x0120CC, 0x86DD71, 0xB6DEC9, 0xF560BF, 0x11654D, 0x6B0701, 0xACB08C, 0xD0C0B2, 0x485551, 0x0EFB1E, 0xC37295, 0x3B06A3, 0x3540C0, 0x7BDC06, 0xCC45E0, 0xFA294E, 0xC8CAD6, 0x41F3E8, 0xDE647C, 0xD8649B, 0x31BED9, 0xC397A4, 0xD45877, 0xC5E369, 0x13DAF0, 0x3C3ABA, 0x461846, 0x5F7555, 0xF5BDD2, 0xC6926E, 0x5D2EAC, 0xED440E, 0x423E1C, 0x87C461, 0xE9FD29, 0xF3D6E7, 0xCA7C22, 0x35916F, 0xC5E008, 0x8DD7FF, 0xE26A6E, 0xC6FDB0, 0xC10893, 0x745D7C, 0xB2AD6B, 0x9D6ECD, 0x7B723E, 0x6A11C6, 0xA9CFF7, 0xDF7329, 0xBAC9B5, 0x5100B7, 0x0DB2E2, 0x24BA74, 0x607DE5, 0x8AD874, 0x2C150D, 0x0C1881, 0x94667E, 0x162901, 0x767A9F, 0xBEFDFD, 0xEF4556, 0x367ED9, 0x13D9EC, 0xB9BA8B, 0xFC97C4, 0x27A831, 0xC36EF1, 0x36C594, 0x56A8D8, 0xB5A8B4, 0x0ECCCF, 0x2D8912, 0x34576F, 0x89562C, 0xE3CE99, 0xB920D6, 0xAA5E6B, 0x9C2A3E, 0xCC5F11, 0x4A0BFD, 0xFBF4E1, 0x6D3B8E, 0x2C86E2, 0x84D4E9, 0xA9B4FC, 0xD1EEEF, 0xC9352E, 0x61392F, 0x442138, 0xC8D91B, 0x0AFC81, 0x6A4AFB, 0xD81C2F, 0x84B453, 0x8C994E, 0xCC2254, 0xDC552A, 0xD6C6C0, 0x96190B, 0xB8701A, 0x649569, 0x605A26, 0xEE523F, 0x0F117F, 0x11B5F4, 0xF5CBFC, 0x2DBC34, 0xEEBC34, 0xCC5DE8, 0x605EDD, 0x9B8E67, 0xEF3392, 0xB817C9, 0x9B5861, 0xBC57E1, 0xC68351, 0x103ED8, 0x4871DD, 0xDD1C2D, 0xA118AF, 0x462C21, 0xD7F359, 0x987AD9, 0xC0549E, 0xFA864F, 0xFC0656, 0xAE79E5, 0x362289, 0x22AD38, 0xDC9367, 0xAAE855, 0x382682, 0x9BE7CA, 0xA40D51, 0xB13399, 0x0ED7A9, 0x480569, 0xF0B265, 0xA7887F, 0x974C88, 0x36D1F9, 0xB39221, 0x4A827B, 0x21CF98, 0xDC9F40, 0x5547DC, 0x3A74E1, 0x42EB67, 0xDF9DFE, 0x5FD45E, 0xA4677B, 0x7AACBA, 0xA2F655, 0x23882B, 0x55BA41, 0x086E59, 0x862A21, 0x834739, 0xE6E389, 0xD49EE5, 0x40FB49, 0xE956FF, 0xCA0F1C, 0x8A59C5, 0x2BFA94, 0xC5C1D3, 0xCFC50F, 0xAE5ADB, 0x86C547, 0x624385, 0x3B8621, 0x94792C, 0x876110, 0x7B4C2A, 0x1A2C80, 0x12BF43, 0x902688, 0x893C78, 0xE4C4A8, 0x7BDBE5, 0xC23AC4, 0xEAF426, 0x8A67F7, 0xBF920D, 0x2BA365, 0xB1933D, 0x0B7CBD, 0xDC51A4, 0x63DD27, 0xDDE169, 0x19949A, 0x9529A8, 0x28CE68, 0xB4ED09, 0x209F44, 0xCA984E, 0x638270, 0x237C7E, 0x32B90F, 0x8EF5A7, 0xE75614, 0x08F121, 0x2A9DB5, 0x4D7E6F, 0x5119A5, 0xABF9B5, 0xD6DF82, 0x61DD96, 0x023616, 0x9F3AC4, 0xA1A283, 0x6DED72, 0x7A8D39, 0xA9B882, 0x5C326B, 0x5B2746, 0xED3400, 0x7700D2, 0x55F4FC, 0x4D5901, 0x8071E0, ]; const PIO2: [f64; 8] = [ 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ ]; // fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 // // Input parameters: // x[] The input value (must be positive) is broken into nx // pieces of 24-bit integers in double precision format. // x[i] will be the i-th 24 bit of x. The scaled exponent // of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 // match x's up to 24 bits. // // Example of breaking a double positive z into x[0]+x[1]+x[2]: // e0 = ilogb(z)-23 // z = scalbn(z,-e0) // for i = 0,1,2 // x[i] = floor(z) // z = (z-x[i])*2**24 // // y[] ouput result in an array of double precision numbers. // The dimension of y[] is: // 24-bit precision 1 // 53-bit precision 2 // 64-bit precision 2 // 113-bit precision 3 // The actual value is the sum of them. Thus for 113-bit // precison, one may have to do something like: // // long double t,w,r_head, r_tail; // t = (long double)y[2] + (long double)y[1]; // w = (long double)y[0]; // r_head = t+w; // r_tail = w - (r_head - t); // // e0 The exponent of x[0]. Must be <= 16360 or you need to // expand the ipio2 table. // // prec an integer indicating the precision: // 0 24 bits (single) // 1 53 bits (double) // 2 64 bits (extended) // 3 113 bits (quad) // // Here is the description of some local variables: // // jk jk+1 is the initial number of terms of ipio2[] needed // in the computation. The minimum and recommended value // for jk is 3,4,4,6 for single, double, extended, and quad. // jk+1 must be 2 larger than you might expect so that our // recomputation test works. (Up to 24 bits in the integer // part (the 24 bits of it that we compute) and 23 bits in // the fraction part may be lost to cancelation before we // recompute.) // // jz local integer variable indicating the number of // terms of ipio2[] used. // // jx nx - 1 // // jv index for pointing to the suitable ipio2[] for the // computation. In general, we want // ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 // is an integer. Thus // e0-3-24*jv >= 0 or (e0-3)/24 >= jv // Hence jv = max(0,(e0-3)/24). // // jp jp+1 is the number of terms in PIo2[] needed, jp = jk. // // q[] double array with integral value, representing the // 24-bits chunk of the product of x and 2/pi. // // q0 the corresponding exponent of q[0]. Note that the // exponent for q[i] would be q0-24*i. // // PIo2[] double precision array, obtained by cutting pi/2 // into 24 bits chunks. // // f[] ipio2[] in floating point // // iq[] integer array by breaking up q[] in 24-bits chunk. // // fq[] final product of x*(2/pi) in fq[0],..,fq[jk] // // ih integer. If >0 it indicates q[] is >= 0.5, hence // it also indicates the *sign* of the result. /// Return the last three digits of N with y = x - N*pi/2 /// so that |y| < pi/2. /// /// The method is to compute the integer (mod 8) and fraction parts of /// (2/pi)*x without doing the full multiplication. In general we /// skip the part of the product that are known to be a huge integer ( /// more accurately, = 0 mod 8 ). Thus the number of operations are /// independent of the exponent of the input. #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) -> i32 { let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24 let x1p_24 = f64::from_bits(0x3e70000000000000); // 0x1p_24 === 2 ^ (-24) #[cfg(all(target_pointer_width = "64", feature = "checked"))] assert!(e0 <= 16360); let nx = x.len(); let mut fw: f64; let mut n: i32; let mut ih: i32; let mut z: f64; let mut f: [f64; 20] = [0.; 20]; let mut fq: [f64; 20] = [0.; 20]; let mut q: [f64; 20] = [0.; 20]; let mut iq: [i32; 20] = [0; 20]; /* initialize jk*/ let jk = i!(INIT_JK, prec); let jp = jk; /* determine jx,jv,q0, note that 3>q0 */ let jx = nx - 1; let mut jv = div!(e0 - 3, 24); if jv < 0 { jv = 0; } let mut q0 = e0 - 24 * (jv + 1); let jv = jv as usize; /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ let mut j = (jv as i32) - (jx as i32); let m = jx + jk; for i in 0..=m { i!(f, i, =, if j < 0 { 0. } else { i!(IPIO2, j as usize) as f64 }); j += 1; } /* compute q[0],q[1],...q[jk] */ for i in 0..=jk { fw = 0f64; for j in 0..=jx { fw += i!(x, j) * i!(f, jx + i - j); } i!(q, i, =, fw); } let mut jz = jk; 'recompute: loop { /* distill q[] into iq[] reversingly */ let mut i = 0i32; z = i!(q, jz); for j in (1..=jz).rev() { fw = (x1p_24 * z) as i32 as f64; i!(iq, i as usize, =, (z - x1p24 * fw) as i32); z = i!(q, j - 1) + fw; i += 1; } /* compute n */ z = scalbn(z, q0); /* actual value of z */ z -= 8.0 * floor(z * 0.125); /* trim off integer >= 8 */ n = z as i32; z -= n as f64; ih = 0; if q0 > 0 { /* need iq[jz-1] to determine n */ i = i!(iq, jz - 1) >> (24 - q0); n += i; i!(iq, jz - 1, -=, i << (24 - q0)); ih = i!(iq, jz - 1) >> (23 - q0); } else if q0 == 0 { ih = i!(iq, jz - 1) >> 23; } else if z >= 0.5 { ih = 2; } if ih > 0 { /* q > 0.5 */ n += 1; let mut carry = 0i32; for i in 0..jz { /* compute 1-q */ let j = i!(iq, i); if carry == 0 { if j != 0 { carry = 1; i!(iq, i, =, 0x1000000 - j); } } else { i!(iq, i, =, 0xffffff - j); } } if q0 > 0 { /* rare case: chance is 1 in 12 */ match q0 { 1 => { i!(iq, jz - 1, &=, 0x7fffff); } 2 => { i!(iq, jz - 1, &=, 0x3fffff); } _ => {} } } if ih == 2 { z = 1. - z; if carry != 0 { z -= scalbn(1., q0); } } } /* check if recomputation is needed */ if z == 0. { let mut j = 0; for i in (jk..=jz - 1).rev() { j |= i!(iq, i); } if j == 0 { /* need recomputation */ let mut k = 1; while i!(iq, jk - k, ==, 0) { k += 1; /* k = no. of terms needed */ } for i in (jz + 1)..=(jz + k) { /* add q[jz+1] to q[jz+k] */ i!(f, jx + i, =, i!(IPIO2, jv + i) as f64); fw = 0f64; for j in 0..=jx { fw += i!(x, j) * i!(f, jx + i - j); } i!(q, i, =, fw); } jz += k; continue 'recompute; } } break; } /* chop off zero terms */ if z == 0. { jz -= 1; q0 -= 24; while i!(iq, jz) == 0 { jz -= 1; q0 -= 24; } } else { /* break z into 24-bit if necessary */ z = scalbn(z, -q0); if z >= x1p24 { fw = (x1p_24 * z) as i32 as f64; i!(iq, jz, =, (z - x1p24 * fw) as i32); jz += 1; q0 += 24; i!(iq, jz, =, fw as i32); } else { i!(iq, jz, =, z as i32); } } /* convert integer "bit" chunk to floating-point value */ fw = scalbn(1., q0); for i in (0..=jz).rev() { i!(q, i, =, fw * (i!(iq, i) as f64)); fw *= x1p_24; } /* compute PIo2[0,...,jp]*q[jz,...,0] */ for i in (0..=jz).rev() { fw = 0f64; let mut k = 0; while (k <= jp) && (k <= jz - i) { fw += i!(PIO2, k) * i!(q, i + k); k += 1; } i!(fq, jz - i, =, fw); } /* compress fq[] into y[] */ match prec { 0 => { fw = 0f64; for i in (0..=jz).rev() { fw += i!(fq, i); } i!(y, 0, =, if ih == 0 { fw } else { -fw }); } 1 | 2 => { fw = 0f64; for i in (0..=jz).rev() { fw += i!(fq, i); } // TODO: drop excess precision here once double_t is used fw = fw as f64; i!(y, 0, =, if ih == 0 { fw } else { -fw }); fw = i!(fq, 0) - fw; for i in 1..=jz { fw += i!(fq, i); } i!(y, 1, =, if ih == 0 { fw } else { -fw }); } 3 => { /* painful */ for i in (1..=jz).rev() { fw = i!(fq, i - 1) + i!(fq, i); i!(fq, i, +=, i!(fq, i - 1) - fw); i!(fq, i - 1, =, fw); } for i in (2..=jz).rev() { fw = i!(fq, i - 1) + i!(fq, i); i!(fq, i, +=, i!(fq, i - 1) - fw); i!(fq, i - 1, =, fw); } fw = 0f64; for i in (2..=jz).rev() { fw += i!(fq, i); } if ih == 0 { i!(y, 0, =, i!(fq, 0)); i!(y, 1, =, i!(fq, 1)); i!(y, 2, =, fw); } else { i!(y, 0, =, -i!(fq, 0)); i!(y, 1, =, -i!(fq, 1)); i!(y, 2, =, -fw); } } #[cfg(debug_assertions)] _ => unreachable!(), #[cfg(not(debug_assertions))] _ => {} } n & 7 } compiler_builtins-0.1.101/libm/src/math/rem_pio2f.rs000064400000000000000000000044231046102023000204050ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. * Debugged and optimized by Bruce D. Evans. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::rem_pio2_large; use core::f64; const TOINT: f64 = 1.5 / f64::EPSILON; /// 53 bits of 2/pi const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ /// first 25 bits of pi/2 const PIO2_1: f64 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */ /// pi/2 - pio2_1 const PIO2_1T: f64 = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ /// Return the remainder of x rem pi/2 in *y /// /// use double precision for everything except passing x /// use __rem_pio2_large() for large x #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) { let x64 = x as f64; let mut tx: [f64; 1] = [0.]; let mut ty: [f64; 1] = [0.]; let ix = x.to_bits() & 0x7fffffff; /* 25+53 bit pi is good enough for medium size */ if ix < 0x4dc90fdb { /* |x| ~< 2^28*(pi/2), medium size */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */ let tmp = x64 * INV_PIO2 + TOINT; // force rounding of tmp to it's storage format on x87 to avoid // excess precision issues. #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] let tmp = force_eval!(tmp); let f_n = tmp - TOINT; return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T); } if ix >= 0x7f800000 { /* x is inf or NaN */ return (0, x64 - x64); } /* scale x into [2^23, 2^24-1] */ let sign = (x.to_bits() >> 31) != 0; let e0 = ((ix >> 23) - (0x7f + 23)) as i32; /* e0 = ilogb(|x|)-23, positive */ tx[0] = f32::from_bits(ix - (e0 << 23) as u32) as f64; let n = rem_pio2_large(&tx, &mut ty, e0, 0); if sign { return (-n, -ty[0]); } (n, ty[0]) } compiler_builtins-0.1.101/libm/src/math/remainder.rs000064400000000000000000000002361046102023000204670ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn remainder(x: f64, y: f64) -> f64 { let (result, _) = super::remquo(x, y); result } compiler_builtins-0.1.101/libm/src/math/remainderf.rs000064400000000000000000000002401046102023000206300ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn remainderf(x: f32, y: f32) -> f32 { let (result, _) = super::remquof(x, y); result } compiler_builtins-0.1.101/libm/src/math/remquo.rs000064400000000000000000000046361046102023000200410ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn remquo(mut x: f64, mut y: f64) -> (f64, i32) { let ux: u64 = x.to_bits(); let mut uy: u64 = y.to_bits(); let mut ex = ((ux >> 52) & 0x7ff) as i32; let mut ey = ((uy >> 52) & 0x7ff) as i32; let sx = (ux >> 63) != 0; let sy = (uy >> 63) != 0; let mut q: u32; let mut i: u64; let mut uxi: u64 = ux; if (uy << 1) == 0 || y.is_nan() || ex == 0x7ff { return ((x * y) / (x * y), 0); } if (ux << 1) == 0 { return (x, 0); } /* normalize x and y */ if ex == 0 { i = uxi << 12; while (i >> 63) == 0 { ex -= 1; i <<= 1; } uxi <<= -ex + 1; } else { uxi &= (!0) >> 12; uxi |= 1 << 52; } if ey == 0 { i = uy << 12; while (i >> 63) == 0 { ey -= 1; i <<= 1; } uy <<= -ey + 1; } else { uy &= (!0) >> 12; uy |= 1 << 52; } q = 0; if ex + 1 != ey { if ex < ey { return (x, 0); } /* x mod y */ while ex > ey { i = uxi.wrapping_sub(uy); if (i >> 63) == 0 { uxi = i; q += 1; } uxi <<= 1; q <<= 1; ex -= 1; } i = uxi.wrapping_sub(uy); if (i >> 63) == 0 { uxi = i; q += 1; } if uxi == 0 { ex = -60; } else { while (uxi >> 52) == 0 { uxi <<= 1; ex -= 1; } } } /* scale result and decide between |x| and |x|-|y| */ if ex > 0 { uxi -= 1 << 52; uxi |= (ex as u64) << 52; } else { uxi >>= -ex + 1; } x = f64::from_bits(uxi); if sy { y = -y; } if ex == ey || (ex + 1 == ey && (2.0 * x > y || (2.0 * x == y && (q % 2) != 0))) { x -= y; // TODO: this matches musl behavior, but it is incorrect q = q.wrapping_add(1); } q &= 0x7fffffff; let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; if sx { (-x, quo) } else { (x, quo) } } #[cfg(test)] mod tests { use super::remquo; #[test] fn test_q_overflow() { // 0xc000000000000001, 0x04c0000000000004 let _ = remquo(-2.0000000000000004, 8.406091369059082e-286); } } compiler_builtins-0.1.101/libm/src/math/remquof.rs000064400000000000000000000041571046102023000202050ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn remquof(mut x: f32, mut y: f32) -> (f32, i32) { let ux: u32 = x.to_bits(); let mut uy: u32 = y.to_bits(); let mut ex = ((ux >> 23) & 0xff) as i32; let mut ey = ((uy >> 23) & 0xff) as i32; let sx = (ux >> 31) != 0; let sy = (uy >> 31) != 0; let mut q: u32; let mut i: u32; let mut uxi: u32 = ux; if (uy << 1) == 0 || y.is_nan() || ex == 0xff { return ((x * y) / (x * y), 0); } if (ux << 1) == 0 { return (x, 0); } /* normalize x and y */ if ex == 0 { i = uxi << 9; while (i >> 31) == 0 { ex -= 1; i <<= 1; } uxi <<= -ex + 1; } else { uxi &= (!0) >> 9; uxi |= 1 << 23; } if ey == 0 { i = uy << 9; while (i >> 31) == 0 { ey -= 1; i <<= 1; } uy <<= -ey + 1; } else { uy &= (!0) >> 9; uy |= 1 << 23; } q = 0; if ex + 1 != ey { if ex < ey { return (x, 0); } /* x mod y */ while ex > ey { i = uxi.wrapping_sub(uy); if (i >> 31) == 0 { uxi = i; q += 1; } uxi <<= 1; q <<= 1; ex -= 1; } i = uxi.wrapping_sub(uy); if (i >> 31) == 0 { uxi = i; q += 1; } if uxi == 0 { ex = -30; } else { while (uxi >> 23) == 0 { uxi <<= 1; ex -= 1; } } } /* scale result and decide between |x| and |x|-|y| */ if ex > 0 { uxi -= 1 << 23; uxi |= (ex as u32) << 23; } else { uxi >>= -ex + 1; } x = f32::from_bits(uxi); if sy { y = -y; } if ex == ey || (ex + 1 == ey && (2.0 * x > y || (2.0 * x == y && (q % 2) != 0))) { x -= y; q += 1; } q &= 0x7fffffff; let quo = if sx ^ sy { -(q as i32) } else { q as i32 }; if sx { (-x, quo) } else { (x, quo) } } compiler_builtins-0.1.101/libm/src/math/rint.rs000064400000000000000000000022421046102023000174740ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rint(x: f64) -> f64 { let one_over_e = 1.0 / f64::EPSILON; let as_u64: u64 = x.to_bits(); let exponent: u64 = as_u64 >> 52 & 0x7ff; let is_positive = (as_u64 >> 63) == 0; if exponent >= 0x3ff + 52 { x } else { let ans = if is_positive { x + one_over_e - one_over_e } else { x - one_over_e + one_over_e }; if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans } } } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::rint; #[test] fn negative_zero() { assert_eq!(rint(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); } #[test] fn sanity_check() { assert_eq!(rint(-1.0), -1.0); assert_eq!(rint(2.8), 3.0); assert_eq!(rint(-0.5), -0.0); assert_eq!(rint(0.5), 0.0); assert_eq!(rint(-1.5), -2.0); assert_eq!(rint(1.5), 2.0); } } compiler_builtins-0.1.101/libm/src/math/rintf.rs000064400000000000000000000022511046102023000176420ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn rintf(x: f32) -> f32 { let one_over_e = 1.0 / f32::EPSILON; let as_u32: u32 = x.to_bits(); let exponent: u32 = as_u32 >> 23 & 0xff; let is_positive = (as_u32 >> 31) == 0; if exponent >= 0x7f + 23 { x } else { let ans = if is_positive { x + one_over_e - one_over_e } else { x - one_over_e + one_over_e }; if ans == 0.0 { if is_positive { 0.0 } else { -0.0 } } else { ans } } } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::rintf; #[test] fn negative_zero() { assert_eq!(rintf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); } #[test] fn sanity_check() { assert_eq!(rintf(-1.0), -1.0); assert_eq!(rintf(2.8), 3.0); assert_eq!(rintf(-0.5), -0.0); assert_eq!(rintf(0.5), 0.0); assert_eq!(rintf(-1.5), -2.0); assert_eq!(rintf(1.5), 2.0); } } compiler_builtins-0.1.101/libm/src/math/round.rs000064400000000000000000000011731046102023000176510ustar 00000000000000use super::copysign; use super::trunc; use core::f64; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn round(x: f64) -> f64 { trunc(x + copysign(0.5 - 0.25 * f64::EPSILON, x)) } #[cfg(test)] mod tests { use super::round; #[test] fn negative_zero() { assert_eq!(round(-0.0_f64).to_bits(), (-0.0_f64).to_bits()); } #[test] fn sanity_check() { assert_eq!(round(-1.0), -1.0); assert_eq!(round(2.8), 3.0); assert_eq!(round(-0.5), -1.0); assert_eq!(round(0.5), 1.0); assert_eq!(round(-1.5), -2.0); assert_eq!(round(1.5), 2.0); } } compiler_builtins-0.1.101/libm/src/math/roundf.rs000064400000000000000000000014071046102023000200170ustar 00000000000000use super::copysignf; use super::truncf; use core::f32; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn roundf(x: f32) -> f32 { truncf(x + copysignf(0.5 - 0.25 * f32::EPSILON, x)) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::roundf; #[test] fn negative_zero() { assert_eq!(roundf(-0.0_f32).to_bits(), (-0.0_f32).to_bits()); } #[test] fn sanity_check() { assert_eq!(roundf(-1.0), -1.0); assert_eq!(roundf(2.8), 3.0); assert_eq!(roundf(-0.5), -1.0); assert_eq!(roundf(0.5), 1.0); assert_eq!(roundf(-1.5), -2.0); assert_eq!(roundf(1.5), 2.0); } } compiler_builtins-0.1.101/libm/src/math/scalbn.rs000064400000000000000000000017041046102023000177640ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn scalbn(x: f64, mut n: i32) -> f64 { let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023 let x1p53 = f64::from_bits(0x4340000000000000); // 0x1p53 === 2 ^ 53 let x1p_1022 = f64::from_bits(0x0010000000000000); // 0x1p-1022 === 2 ^ (-1022) let mut y = x; if n > 1023 { y *= x1p1023; n -= 1023; if n > 1023 { y *= x1p1023; n -= 1023; if n > 1023 { n = 1023; } } } else if n < -1022 { /* make sure final n < -53 to avoid double rounding in the subnormal range */ y *= x1p_1022 * x1p53; n += 1022 - 53; if n < -1022 { y *= x1p_1022 * x1p53; n += 1022 - 53; if n < -1022 { n = -1022; } } } y * f64::from_bits(((0x3ff + n) as u64) << 52) } compiler_builtins-0.1.101/libm/src/math/scalbnf.rs000064400000000000000000000014471046102023000201360ustar 00000000000000#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn scalbnf(mut x: f32, mut n: i32) -> f32 { let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127 let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 let x1p24 = f32::from_bits(0x4b800000); // 0x1p24f === 2 ^ 24 if n > 127 { x *= x1p127; n -= 127; if n > 127 { x *= x1p127; n -= 127; if n > 127 { n = 127; } } } else if n < -126 { x *= x1p_126 * x1p24; n += 126 - 24; if n < -126 { x *= x1p_126 * x1p24; n += 126 - 24; if n < -126 { n = -126; } } } x * f32::from_bits(((0x7f + n) as u32) << 23) } compiler_builtins-0.1.101/libm/src/math/sin.rs000064400000000000000000000053631046102023000173200ustar 00000000000000// origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ // // ==================================================== // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. // // Developed at SunPro, a Sun Microsystems, Inc. business. // Permission to use, copy, modify, and distribute this // software is freely granted, provided that this notice // is preserved. // ==================================================== use super::{k_cos, k_sin, rem_pio2}; // sin(x) // Return sine function of x. // // kernel function: // k_sin ... sine function on [-pi/4,pi/4] // k_cos ... cose function on [-pi/4,pi/4] // rem_pio2 ... argument reduction routine // // Method. // Let S,C and T denote the sin, cos and tan respectively on // [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 // in [-pi/4 , +pi/4], and let n = k mod 4. // We have // // n sin(x) cos(x) tan(x) // ---------------------------------------------------------- // 0 S C T // 1 C -S -1/T // 2 -S -C T // 3 -C S -1/T // ---------------------------------------------------------- // // Special cases: // Let trig be any of sin, cos, or tan. // trig(+-INF) is NaN, with signals; // trig(NaN) is that NaN; // // Accuracy: // TRIG(x) returns trig(x) nearly rounded #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sin(x: f64) -> f64 { let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 /* High word of x. */ let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; /* |x| ~< pi/4 */ if ix <= 0x3fe921fb { if ix < 0x3e500000 { /* |x| < 2**-26 */ /* raise inexact if x != 0 and underflow if subnormal*/ if ix < 0x00100000 { force_eval!(x / x1p120); } else { force_eval!(x + x1p120); } return x; } return k_sin(x, 0.0, 0); } /* sin(Inf or NaN) is NaN */ if ix >= 0x7ff00000 { return x - x; } /* argument reduction needed */ let (n, y0, y1) = rem_pio2(x); match n & 3 { 0 => k_sin(y0, y1, 1), 1 => k_cos(y0, y1), 2 => -k_sin(y0, y1, 1), _ => -k_cos(y0, y1), } } #[test] fn test_near_pi() { let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707 let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7 let result = sin(x); #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] let result = force_eval!(result); assert_eq!(result, sx); } compiler_builtins-0.1.101/libm/src/math/sincos.rs000064400000000000000000000067611046102023000200300ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{get_high_word, k_cos, k_sin, rem_pio2}; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sincos(x: f64) -> (f64, f64) { let s: f64; let c: f64; let mut ix: u32; ix = get_high_word(x); ix &= 0x7fffffff; /* |x| ~< pi/4 */ if ix <= 0x3fe921fb { /* if |x| < 2**-27 * sqrt(2) */ if ix < 0x3e46a09e { /* raise inexact if x!=0 and underflow if subnormal */ let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120 == 2^120 if ix < 0x00100000 { force_eval!(x / x1p120); } else { force_eval!(x + x1p120); } return (x, 1.0); } return (k_sin(x, 0.0, 0), k_cos(x, 0.0)); } /* sincos(Inf or NaN) is NaN */ if ix >= 0x7ff00000 { let rv = x - x; return (rv, rv); } /* argument reduction needed */ let (n, y0, y1) = rem_pio2(x); s = k_sin(y0, y1, 1); c = k_cos(y0, y1); match n & 3 { 0 => (s, c), 1 => (c, -s), 2 => (-s, -c), 3 => (-c, s), #[cfg(debug_assertions)] _ => unreachable!(), #[cfg(not(debug_assertions))] _ => (0.0, 1.0), } } // These tests are based on those from sincosf.rs #[cfg(test)] mod tests { use super::sincos; const TOLERANCE: f64 = 1e-6; #[test] fn with_pi() { let (s, c) = sincos(core::f64::consts::PI); assert!( (s - 0.0).abs() < TOLERANCE, "|{} - {}| = {} >= {}", s, 0.0, (s - 0.0).abs(), TOLERANCE ); assert!( (c + 1.0).abs() < TOLERANCE, "|{} + {}| = {} >= {}", c, 1.0, (s + 1.0).abs(), TOLERANCE ); } #[test] fn rotational_symmetry() { use core::f64::consts::PI; const N: usize = 24; for n in 0..N { let theta = 2. * PI * (n as f64) / (N as f64); let (s, c) = sincos(theta); let (s_plus, c_plus) = sincos(theta + 2. * PI); let (s_minus, c_minus) = sincos(theta - 2. * PI); assert!( (s - s_plus).abs() < TOLERANCE, "|{} - {}| = {} >= {}", s, s_plus, (s - s_plus).abs(), TOLERANCE ); assert!( (s - s_minus).abs() < TOLERANCE, "|{} - {}| = {} >= {}", s, s_minus, (s - s_minus).abs(), TOLERANCE ); assert!( (c - c_plus).abs() < TOLERANCE, "|{} - {}| = {} >= {}", c, c_plus, (c - c_plus).abs(), TOLERANCE ); assert!( (c - c_minus).abs() < TOLERANCE, "|{} - {}| = {} >= {}", c, c_minus, (c - c_minus).abs(), TOLERANCE ); } } } compiler_builtins-0.1.101/libm/src/math/sincosf.rs000064400000000000000000000122461046102023000201710ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. * Optimized by Bruce D. Evans. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{k_cosf, k_sinf, rem_pio2f}; /* Small multiples of pi/2 rounded to double precision. */ const PI_2: f32 = 0.5 * 3.1415926535897931160E+00; const S1PIO2: f32 = 1.0 * PI_2; /* 0x3FF921FB, 0x54442D18 */ const S2PIO2: f32 = 2.0 * PI_2; /* 0x400921FB, 0x54442D18 */ const S3PIO2: f32 = 3.0 * PI_2; /* 0x4012D97C, 0x7F3321D2 */ const S4PIO2: f32 = 4.0 * PI_2; /* 0x401921FB, 0x54442D18 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sincosf(x: f32) -> (f32, f32) { let s: f32; let c: f32; let mut ix: u32; let sign: bool; ix = x.to_bits(); sign = (ix >> 31) != 0; ix &= 0x7fffffff; /* |x| ~<= pi/4 */ if ix <= 0x3f490fda { /* |x| < 2**-12 */ if ix < 0x39800000 { /* raise inexact if x!=0 and underflow if subnormal */ let x1p120 = f32::from_bits(0x7b800000); // 0x1p120 == 2^120 if ix < 0x00100000 { force_eval!(x / x1p120); } else { force_eval!(x + x1p120); } return (x, 1.0); } return (k_sinf(x as f64), k_cosf(x as f64)); } /* |x| ~<= 5*pi/4 */ if ix <= 0x407b53d1 { if ix <= 0x4016cbe3 { /* |x| ~<= 3pi/4 */ if sign { s = -k_cosf((x + S1PIO2) as f64); c = k_sinf((x + S1PIO2) as f64); } else { s = k_cosf((S1PIO2 - x) as f64); c = k_sinf((S1PIO2 - x) as f64); } } /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ else { if sign { s = -k_sinf((x + S2PIO2) as f64); c = -k_cosf((x + S2PIO2) as f64); } else { s = -k_sinf((x - S2PIO2) as f64); c = -k_cosf((x - S2PIO2) as f64); } } return (s, c); } /* |x| ~<= 9*pi/4 */ if ix <= 0x40e231d5 { if ix <= 0x40afeddf { /* |x| ~<= 7*pi/4 */ if sign { s = k_cosf((x + S3PIO2) as f64); c = -k_sinf((x + S3PIO2) as f64); } else { s = -k_cosf((x - S3PIO2) as f64); c = k_sinf((x - S3PIO2) as f64); } } else { if sign { s = k_sinf((x + S4PIO2) as f64); c = k_cosf((x + S4PIO2) as f64); } else { s = k_sinf((x - S4PIO2) as f64); c = k_cosf((x - S4PIO2) as f64); } } return (s, c); } /* sin(Inf or NaN) is NaN */ if ix >= 0x7f800000 { let rv = x - x; return (rv, rv); } /* general argument reduction needed */ let (n, y) = rem_pio2f(x); s = k_sinf(y); c = k_cosf(y); match n & 3 { 0 => (s, c), 1 => (c, -s), 2 => (-s, -c), 3 => (-c, s), #[cfg(debug_assertions)] _ => unreachable!(), #[cfg(not(debug_assertions))] _ => (0.0, 1.0), } } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::sincosf; use crate::_eqf; #[test] fn with_pi() { let (s, c) = sincosf(core::f32::consts::PI); _eqf(s.abs(), 0.0).unwrap(); _eqf(c, -1.0).unwrap(); } #[test] fn rotational_symmetry() { use core::f32::consts::PI; const N: usize = 24; for n in 0..N { let theta = 2. * PI * (n as f32) / (N as f32); let (s, c) = sincosf(theta); let (s_plus, c_plus) = sincosf(theta + 2. * PI); let (s_minus, c_minus) = sincosf(theta - 2. * PI); const TOLERANCE: f32 = 1e-6; assert!( (s - s_plus).abs() < TOLERANCE, "|{} - {}| = {} >= {}", s, s_plus, (s - s_plus).abs(), TOLERANCE ); assert!( (s - s_minus).abs() < TOLERANCE, "|{} - {}| = {} >= {}", s, s_minus, (s - s_minus).abs(), TOLERANCE ); assert!( (c - c_plus).abs() < TOLERANCE, "|{} - {}| = {} >= {}", c, c_plus, (c - c_plus).abs(), TOLERANCE ); assert!( (c - c_minus).abs() < TOLERANCE, "|{} - {}| = {} >= {}", c, c_minus, (c - c_minus).abs(), TOLERANCE ); } } } compiler_builtins-0.1.101/libm/src/math/sinf.rs000064400000000000000000000051271046102023000174640ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. * Optimized by Bruce D. Evans. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{k_cosf, k_sinf, rem_pio2f}; use core::f64::consts::FRAC_PI_2; /* Small multiples of pi/2 rounded to double precision. */ const S1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ const S2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ const S3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const S4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinf(x: f32) -> f32 { let x64 = x as f64; let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 let mut ix = x.to_bits(); let sign = (ix >> 31) != 0; ix &= 0x7fffffff; if ix <= 0x3f490fda { /* |x| ~<= pi/4 */ if ix < 0x39800000 { /* |x| < 2**-12 */ /* raise inexact if x!=0 and underflow if subnormal */ force_eval!(if ix < 0x00800000 { x / x1p120 } else { x + x1p120 }); return x; } return k_sinf(x64); } if ix <= 0x407b53d1 { /* |x| ~<= 5*pi/4 */ if ix <= 0x4016cbe3 { /* |x| ~<= 3pi/4 */ if sign { return -k_cosf(x64 + S1_PIO2); } else { return k_cosf(x64 - S1_PIO2); } } return k_sinf(if sign { -(x64 + S2_PIO2) } else { -(x64 - S2_PIO2) }); } if ix <= 0x40e231d5 { /* |x| ~<= 9*pi/4 */ if ix <= 0x40afeddf { /* |x| ~<= 7*pi/4 */ if sign { return k_cosf(x64 + S3_PIO2); } else { return -k_cosf(x64 - S3_PIO2); } } return k_sinf(if sign { x64 + S4_PIO2 } else { x64 - S4_PIO2 }); } /* sin(Inf or NaN) is NaN */ if ix >= 0x7f800000 { return x - x; } /* general argument reduction needed */ let (n, y) = rem_pio2f(x); match n & 3 { 0 => k_sinf(y), 1 => k_cosf(y), 2 => k_sinf(-y), _ => -k_cosf(y), } } compiler_builtins-0.1.101/libm/src/math/sinh.rs000064400000000000000000000023551046102023000174660ustar 00000000000000use super::{expm1, expo2}; // sinh(x) = (exp(x) - 1/exp(x))/2 // = (exp(x)-1 + (exp(x)-1)/exp(x))/2 // = x + x^3/6 + o(x^5) // #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinh(x: f64) -> f64 { // union {double f; uint64_t i;} u = {.f = x}; // uint32_t w; // double t, h, absx; let mut uf: f64 = x; let mut ui: u64 = f64::to_bits(uf); let w: u32; let t: f64; let mut h: f64; let absx: f64; h = 0.5; if ui >> 63 != 0 { h = -h; } /* |x| */ ui &= !1 / 2; uf = f64::from_bits(ui); absx = uf; w = (ui >> 32) as u32; /* |x| < log(DBL_MAX) */ if w < 0x40862e42 { t = expm1(absx); if w < 0x3ff00000 { if w < 0x3ff00000 - (26 << 20) { /* note: inexact and underflow are raised by expm1 */ /* note: this branch avoids spurious underflow */ return x; } return h * (2.0 * t - t * t / (t + 1.0)); } /* note: |x|>log(0x1p26)+eps could be just h*exp(x) */ return h * (t + t / (t + 1.0)); } /* |x| > log(DBL_MAX) or nan */ /* note: the result is stored to handle overflow */ t = 2.0 * h * expo2(absx); t } compiler_builtins-0.1.101/libm/src/math/sinhf.rs000064400000000000000000000012651046102023000176330ustar 00000000000000use super::expm1f; use super::k_expo2f; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sinhf(x: f32) -> f32 { let mut h = 0.5f32; let mut ix = x.to_bits(); if (ix >> 31) != 0 { h = -h; } /* |x| */ ix &= 0x7fffffff; let absx = f32::from_bits(ix); let w = ix; /* |x| < log(FLT_MAX) */ if w < 0x42b17217 { let t = expm1f(absx); if w < 0x3f800000 { if w < (0x3f800000 - (12 << 23)) { return x; } return h * (2. * t - t * t / (t + 1.)); } return h * (t + t / (t + 1.)); } /* |x| > logf(FLT_MAX) or nan */ 2. * h * k_expo2f(absx) } compiler_builtins-0.1.101/libm/src/math/sqrt.rs000064400000000000000000000221171046102023000175140ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrt.c */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunSoft, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ /* sqrt(x) * Return correctly rounded sqrt. * ------------------------------------------ * | Use the hardware sqrt if you have one | * ------------------------------------------ * Method: * Bit by bit method using integer arithmetic. (Slow, but portable) * 1. Normalization * Scale x to y in [1,4) with even powers of 2: * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then * sqrt(x) = 2^k * sqrt(y) * 2. Bit by bit computation * Let q = sqrt(y) truncated to i bit after binary point (q = 1), * i 0 * i+1 2 * s = 2*q , and y = 2 * ( y - q ). (1) * i i i i * * To compute q from q , one checks whether * i+1 i * * -(i+1) 2 * (q + 2 ) <= y. (2) * i * -(i+1) * If (2) is false, then q = q ; otherwise q = q + 2 . * i+1 i i+1 i * * With some algebraic manipulation, it is not difficult to see * that (2) is equivalent to * -(i+1) * s + 2 <= y (3) * i i * * The advantage of (3) is that s and y can be computed by * i i * the following recurrence formula: * if (3) is false * * s = s , y = y ; (4) * i+1 i i+1 i * * otherwise, * -i -(i+1) * s = s + 2 , y = y - s - 2 (5) * i+1 i i+1 i i * * One may easily use induction to prove (4) and (5). * Note. Since the left hand side of (3) contain only i+2 bits, * it does not necessary to do a full (53-bit) comparison * in (3). * 3. Final rounding * After generating the 53 bits result, we compute one more bit. * Together with the remainder, we can decide whether the * result is exact, bigger than 1/2ulp, or less than 1/2ulp * (it will never equal to 1/2ulp). * The rounding mode can be detected by checking whether * huge + tiny is equal to huge, and whether huge - tiny is * equal to huge for some floating point number "huge" and "tiny". * * Special cases: * sqrt(+-0) = +-0 ... exact * sqrt(inf) = inf * sqrt(-ve) = NaN ... with invalid signal * sqrt(NaN) = NaN ... with invalid signal for signaling NaN */ use core::f64; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrt(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.sqrt` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return if x < 0.0 { f64::NAN } else { unsafe { ::core::intrinsics::sqrtf64(x) } } } } #[cfg(target_feature = "sse2")] { // Note: This path is unlikely since LLVM will usually have already // optimized sqrt calls into hardware instructions if sse2 is available, // but if someone does end up here they'll apprected the speed increase. #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; unsafe { let m = _mm_set_sd(x); let m_sqrt = _mm_sqrt_pd(m); _mm_cvtsd_f64(m_sqrt) } } #[cfg(not(target_feature = "sse2"))] { use core::num::Wrapping; const TINY: f64 = 1.0e-300; let mut z: f64; let sign: Wrapping = Wrapping(0x80000000); let mut ix0: i32; let mut s0: i32; let mut q: i32; let mut m: i32; let mut t: i32; let mut i: i32; let mut r: Wrapping; let mut t1: Wrapping; let mut s1: Wrapping; let mut ix1: Wrapping; let mut q1: Wrapping; ix0 = (x.to_bits() >> 32) as i32; ix1 = Wrapping(x.to_bits() as u32); /* take care of Inf and NaN */ if (ix0 & 0x7ff00000) == 0x7ff00000 { return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ } /* take care of zero */ if ix0 <= 0 { if ((ix0 & !(sign.0 as i32)) | ix1.0 as i32) == 0 { return x; /* sqrt(+-0) = +-0 */ } if ix0 < 0 { return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } } /* normalize x */ m = ix0 >> 20; if m == 0 { /* subnormal x */ while ix0 == 0 { m -= 21; ix0 |= (ix1 >> 11).0 as i32; ix1 <<= 21; } i = 0; while (ix0 & 0x00100000) == 0 { i += 1; ix0 <<= 1; } m -= i - 1; ix0 |= (ix1 >> (32 - i) as usize).0 as i32; ix1 = ix1 << i as usize; } m -= 1023; /* unbias exponent */ ix0 = (ix0 & 0x000fffff) | 0x00100000; if (m & 1) == 1 { /* odd m, double x to make it even */ ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; } m >>= 1; /* m = [m/2] */ /* generate sqrt(x) bit by bit */ ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; q = 0; /* [q,q1] = sqrt(x) */ q1 = Wrapping(0); s0 = 0; s1 = Wrapping(0); r = Wrapping(0x00200000); /* r = moving bit from right to left */ while r != Wrapping(0) { t = s0 + r.0 as i32; if t <= ix0 { s0 = t + r.0 as i32; ix0 -= t; q += r.0 as i32; } ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; r >>= 1; } r = sign; while r != Wrapping(0) { t1 = s1 + r; t = s0; if t < ix0 || (t == ix0 && t1 <= ix1) { s1 = t1 + r; if (t1 & sign) == sign && (s1 & sign) == Wrapping(0) { s0 += 1; } ix0 -= t; if ix1 < t1 { ix0 -= 1; } ix1 -= t1; q1 += r; } ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32; ix1 += ix1; r >>= 1; } /* use floating add to find out rounding direction */ if (ix0 as u32 | ix1.0) != 0 { z = 1.0 - TINY; /* raise inexact flag */ if z >= 1.0 { z = 1.0 + TINY; if q1.0 == 0xffffffff { q1 = Wrapping(0); q += 1; } else if z > 1.0 { if q1.0 == 0xfffffffe { q += 1; } q1 += Wrapping(2); } else { q1 += q1 & Wrapping(1); } } } ix0 = (q >> 1) + 0x3fe00000; ix1 = q1 >> 1; if (q & 1) == 1 { ix1 |= sign; } ix0 += m << 20; f64::from_bits((ix0 as u64) << 32 | ix1.0 as u64) } } #[cfg(test)] mod tests { use super::*; use core::f64::*; #[test] fn sanity_check() { assert_eq!(sqrt(100.0), 10.0); assert_eq!(sqrt(4.0), 2.0); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt #[test] fn spec_tests() { // Not Asserted: FE_INVALID exception is raised if argument is negative. assert!(sqrt(-1.0).is_nan()); assert!(sqrt(NAN).is_nan()); for f in [0.0, -0.0, INFINITY].iter().copied() { assert_eq!(sqrt(f), f); } } #[test] fn conformance_tests() { let values = [3.14159265359, 10000.0, f64::from_bits(0x0000000f), INFINITY]; let results = [ 4610661241675116657u64, 4636737291354636288u64, 2197470602079456986u64, 9218868437227405312u64, ]; for i in 0..values.len() { let bits = f64::to_bits(sqrt(values[i])); assert_eq!(results[i], bits); } } } compiler_builtins-0.1.101/libm/src/math/sqrtf.rs000064400000000000000000000113521046102023000176610ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn sqrtf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.sqrt` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return if x < 0.0 { ::core::f32::NAN } else { unsafe { ::core::intrinsics::sqrtf32(x) } } } } #[cfg(target_feature = "sse")] { // Note: This path is unlikely since LLVM will usually have already // optimized sqrt calls into hardware instructions if sse is available, // but if someone does end up here they'll apprected the speed increase. #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; unsafe { let m = _mm_set_ss(x); let m_sqrt = _mm_sqrt_ss(m); _mm_cvtss_f32(m_sqrt) } } #[cfg(not(target_feature = "sse"))] { const TINY: f32 = 1.0e-30; let mut z: f32; let sign: i32 = 0x80000000u32 as i32; let mut ix: i32; let mut s: i32; let mut q: i32; let mut m: i32; let mut t: i32; let mut i: i32; let mut r: u32; ix = x.to_bits() as i32; /* take care of Inf and NaN */ if (ix as u32 & 0x7f800000) == 0x7f800000 { return x * x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */ } /* take care of zero */ if ix <= 0 { if (ix & !sign) == 0 { return x; /* sqrt(+-0) = +-0 */ } if ix < 0 { return (x - x) / (x - x); /* sqrt(-ve) = sNaN */ } } /* normalize x */ m = ix >> 23; if m == 0 { /* subnormal x */ i = 0; while ix & 0x00800000 == 0 { ix <<= 1; i = i + 1; } m -= i - 1; } m -= 127; /* unbias exponent */ ix = (ix & 0x007fffff) | 0x00800000; if m & 1 == 1 { /* odd m, double x to make it even */ ix += ix; } m >>= 1; /* m = [m/2] */ /* generate sqrt(x) bit by bit */ ix += ix; q = 0; s = 0; r = 0x01000000; /* r = moving bit from right to left */ while r != 0 { t = s + r as i32; if t <= ix { s = t + r as i32; ix -= t; q += r as i32; } ix += ix; r >>= 1; } /* use floating add to find out rounding direction */ if ix != 0 { z = 1.0 - TINY; /* raise inexact flag */ if z >= 1.0 { z = 1.0 + TINY; if z > 1.0 { q += 2; } else { q += q & 1; } } } ix = (q >> 1) + 0x3f000000; ix += m << 23; f32::from_bits(ix as u32) } } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { use super::*; use core::f32::*; #[test] fn sanity_check() { assert_eq!(sqrtf(100.0), 10.0); assert_eq!(sqrtf(4.0), 2.0); } /// The spec: https://en.cppreference.com/w/cpp/numeric/math/sqrt #[test] fn spec_tests() { // Not Asserted: FE_INVALID exception is raised if argument is negative. assert!(sqrtf(-1.0).is_nan()); assert!(sqrtf(NAN).is_nan()); for f in [0.0, -0.0, INFINITY].iter().copied() { assert_eq!(sqrtf(f), f); } } #[test] fn conformance_tests() { let values = [ 3.14159265359f32, 10000.0f32, f32::from_bits(0x0000000f), INFINITY, ]; let results = [1071833029u32, 1120403456u32, 456082799u32, 2139095040u32]; for i in 0..values.len() { let bits = f32::to_bits(sqrtf(values[i])); assert_eq!(results[i], bits); } } } compiler_builtins-0.1.101/libm/src/math/tan.rs000064400000000000000000000043101046102023000173000ustar 00000000000000// origin: FreeBSD /usr/src/lib/msun/src/s_tan.c */ // // ==================================================== // Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. // // Developed at SunPro, a Sun Microsystems, Inc. business. // Permission to use, copy, modify, and distribute this // software is freely granted, provided that this notice // is preserved. // ==================================================== use super::{k_tan, rem_pio2}; // tan(x) // Return tangent function of x. // // kernel function: // k_tan ... tangent function on [-pi/4,pi/4] // rem_pio2 ... argument reduction routine // // Method. // Let S,C and T denote the sin, cos and tan respectively on // [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 // in [-pi/4 , +pi/4], and let n = k mod 4. // We have // // n sin(x) cos(x) tan(x) // ---------------------------------------------------------- // 0 S C T // 1 C -S -1/T // 2 -S -C T // 3 -C S -1/T // ---------------------------------------------------------- // // Special cases: // Let trig be any of sin, cos, or tan. // trig(+-INF) is NaN, with signals; // trig(NaN) is that NaN; // // Accuracy: // TRIG(x) returns trig(x) nearly rounded #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tan(x: f64) -> f64 { let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff; /* |x| ~< pi/4 */ if ix <= 0x3fe921fb { if ix < 0x3e400000 { /* |x| < 2**-27 */ /* raise inexact if x!=0 and underflow if subnormal */ force_eval!(if ix < 0x00100000 { x / x1p120 as f64 } else { x + x1p120 as f64 }); return x; } return k_tan(x, 0.0, 0); } /* tan(Inf or NaN) is NaN */ if ix >= 0x7ff00000 { return x - x; } /* argument reduction */ let (n, y0, y1) = rem_pio2(x); k_tan(y0, y1, n & 1) } compiler_builtins-0.1.101/libm/src/math/tanf.rs000064400000000000000000000045531046102023000174570ustar 00000000000000/* origin: FreeBSD /usr/src/lib/msun/src/s_tanf.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. * Optimized by Bruce D. Evans. */ /* * ==================================================== * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. * * Developed at SunPro, a Sun Microsystems, Inc. business. * Permission to use, copy, modify, and distribute this * software is freely granted, provided that this notice * is preserved. * ==================================================== */ use super::{k_tanf, rem_pio2f}; use core::f64::consts::FRAC_PI_2; /* Small multiples of pi/2 rounded to double precision. */ const T1_PIO2: f64 = 1. * FRAC_PI_2; /* 0x3FF921FB, 0x54442D18 */ const T2_PIO2: f64 = 2. * FRAC_PI_2; /* 0x400921FB, 0x54442D18 */ const T3_PIO2: f64 = 3. * FRAC_PI_2; /* 0x4012D97C, 0x7F3321D2 */ const T4_PIO2: f64 = 4. * FRAC_PI_2; /* 0x401921FB, 0x54442D18 */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanf(x: f32) -> f32 { let x64 = x as f64; let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 let mut ix = x.to_bits(); let sign = (ix >> 31) != 0; ix &= 0x7fffffff; if ix <= 0x3f490fda { /* |x| ~<= pi/4 */ if ix < 0x39800000 { /* |x| < 2**-12 */ /* raise inexact if x!=0 and underflow if subnormal */ force_eval!(if ix < 0x00800000 { x / x1p120 } else { x + x1p120 }); return x; } return k_tanf(x64, false); } if ix <= 0x407b53d1 { /* |x| ~<= 5*pi/4 */ if ix <= 0x4016cbe3 { /* |x| ~<= 3pi/4 */ return k_tanf(if sign { x64 + T1_PIO2 } else { x64 - T1_PIO2 }, true); } else { return k_tanf(if sign { x64 + T2_PIO2 } else { x64 - T2_PIO2 }, false); } } if ix <= 0x40e231d5 { /* |x| ~<= 9*pi/4 */ if ix <= 0x40afeddf { /* |x| ~<= 7*pi/4 */ return k_tanf(if sign { x64 + T3_PIO2 } else { x64 - T3_PIO2 }, true); } else { return k_tanf(if sign { x64 + T4_PIO2 } else { x64 - T4_PIO2 }, false); } } /* tan(Inf or NaN) is NaN */ if ix >= 0x7f800000 { return x - x; } /* argument reduction */ let (n, y) = rem_pio2f(x); k_tanf(y, n & 1 != 0) } compiler_builtins-0.1.101/libm/src/math/tanh.rs000064400000000000000000000025201046102023000174510ustar 00000000000000use super::expm1; /* tanh(x) = (exp(x) - exp(-x))/(exp(x) + exp(-x)) * = (exp(2*x) - 1)/(exp(2*x) - 1 + 2) * = (1 - exp(-2*x))/(exp(-2*x) - 1 + 2) */ #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanh(mut x: f64) -> f64 { let mut uf: f64 = x; let mut ui: u64 = f64::to_bits(uf); let w: u32; let sign: bool; let mut t: f64; /* x = |x| */ sign = ui >> 63 != 0; ui &= !1 / 2; uf = f64::from_bits(ui); x = uf; w = (ui >> 32) as u32; if w > 0x3fe193ea { /* |x| > log(3)/2 ~= 0.5493 or nan */ if w > 0x40340000 { /* |x| > 20 or nan */ /* note: this branch avoids raising overflow */ t = 1.0 - 0.0 / x; } else { t = expm1(2.0 * x); t = 1.0 - 2.0 / (t + 2.0); } } else if w > 0x3fd058ae { /* |x| > log(5/3)/2 ~= 0.2554 */ t = expm1(2.0 * x); t = t / (t + 2.0); } else if w >= 0x00100000 { /* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */ t = expm1(-2.0 * x); t = -t / (t + 2.0); } else { /* |x| is subnormal */ /* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */ force_eval!(x as f32); t = x; } if sign { -t } else { t } } compiler_builtins-0.1.101/libm/src/math/tanhf.rs000064400000000000000000000016061046102023000176230ustar 00000000000000use super::expm1f; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tanhf(mut x: f32) -> f32 { /* x = |x| */ let mut ix = x.to_bits(); let sign = (ix >> 31) != 0; ix &= 0x7fffffff; x = f32::from_bits(ix); let w = ix; let tt = if w > 0x3f0c9f54 { /* |x| > log(3)/2 ~= 0.5493 or nan */ if w > 0x41200000 { /* |x| > 10 */ 1. + 0. / x } else { let t = expm1f(2. * x); 1. - 2. / (t + 2.) } } else if w > 0x3e82c578 { /* |x| > log(5/3)/2 ~= 0.2554 */ let t = expm1f(2. * x); t / (t + 2.) } else if w >= 0x00800000 { /* |x| >= 0x1p-126 */ let t = expm1f(-2. * x); -t / (t + 2.) } else { /* |x| is subnormal */ force_eval!(x * x); x }; if sign { -tt } else { tt } } compiler_builtins-0.1.101/libm/src/math/tgamma.rs000064400000000000000000000126161046102023000177740ustar 00000000000000/* "A Precision Approximation of the Gamma Function" - Cornelius Lanczos (1964) "Lanczos Implementation of the Gamma Function" - Paul Godfrey (2001) "An Analysis of the Lanczos Gamma Approximation" - Glendon Ralph Pugh (2004) approximation method: (x - 0.5) S(x) Gamma(x) = (x + g - 0.5) * ---------------- exp(x + g - 0.5) with a1 a2 a3 aN S(x) ~= [ a0 + ----- + ----- + ----- + ... + ----- ] x + 1 x + 2 x + 3 x + N with a0, a1, a2, a3,.. aN constants which depend on g. for x < 0 the following reflection formula is used: Gamma(x)*Gamma(-x) = -pi/(x sin(pi x)) most ideas and constants are from boost and python */ extern crate core; use super::{exp, floor, k_cos, k_sin, pow}; const PI: f64 = 3.141592653589793238462643383279502884; /* sin(pi x) with x > 0x1p-100, if sin(pi*x)==0 the sign is arbitrary */ fn sinpi(mut x: f64) -> f64 { let mut n: isize; /* argument reduction: x = |x| mod 2 */ /* spurious inexact when x is odd int */ x = x * 0.5; x = 2.0 * (x - floor(x)); /* reduce x into [-.25,.25] */ n = (4.0 * x) as isize; n = div!(n + 1, 2); x -= (n as f64) * 0.5; x *= PI; match n { 1 => k_cos(x, 0.0), 2 => k_sin(-x, 0.0, 0), 3 => -k_cos(x, 0.0), 0 | _ => k_sin(x, 0.0, 0), } } const N: usize = 12; //static const double g = 6.024680040776729583740234375; const GMHALF: f64 = 5.524680040776729583740234375; const SNUM: [f64; N + 1] = [ 23531376880.410759688572007674451636754734846804940, 42919803642.649098768957899047001988850926355848959, 35711959237.355668049440185451547166705960488635843, 17921034426.037209699919755754458931112671403265390, 6039542586.3520280050642916443072979210699388420708, 1439720407.3117216736632230727949123939715485786772, 248874557.86205415651146038641322942321632125127801, 31426415.585400194380614231628318205362874684987640, 2876370.6289353724412254090516208496135991145378768, 186056.26539522349504029498971604569928220784236328, 8071.6720023658162106380029022722506138218516325024, 210.82427775157934587250973392071336271166969580291, 2.5066282746310002701649081771338373386264310793408, ]; const SDEN: [f64; N + 1] = [ 0.0, 39916800.0, 120543840.0, 150917976.0, 105258076.0, 45995730.0, 13339535.0, 2637558.0, 357423.0, 32670.0, 1925.0, 66.0, 1.0, ]; /* n! for small integer n */ const FACT: [f64; 23] = [ 1.0, 1.0, 2.0, 6.0, 24.0, 120.0, 720.0, 5040.0, 40320.0, 362880.0, 3628800.0, 39916800.0, 479001600.0, 6227020800.0, 87178291200.0, 1307674368000.0, 20922789888000.0, 355687428096000.0, 6402373705728000.0, 121645100408832000.0, 2432902008176640000.0, 51090942171709440000.0, 1124000727777607680000.0, ]; /* S(x) rational function for positive x */ fn s(x: f64) -> f64 { let mut num: f64 = 0.0; let mut den: f64 = 0.0; /* to avoid overflow handle large x differently */ if x < 8.0 { for i in (0..=N).rev() { num = num * x + i!(SNUM, i); den = den * x + i!(SDEN, i); } } else { for i in 0..=N { num = num / x + i!(SNUM, i); den = den / x + i!(SDEN, i); } } return num / den; } #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tgamma(mut x: f64) -> f64 { let u: u64 = x.to_bits(); let absx: f64; let mut y: f64; let mut dy: f64; let mut z: f64; let mut r: f64; let ix: u32 = ((u >> 32) as u32) & 0x7fffffff; let sign: bool = (u >> 63) != 0; /* special cases */ if ix >= 0x7ff00000 { /* tgamma(nan)=nan, tgamma(inf)=inf, tgamma(-inf)=nan with invalid */ return x + core::f64::INFINITY; } if ix < ((0x3ff - 54) << 20) { /* |x| < 2^-54: tgamma(x) ~ 1/x, +-0 raises div-by-zero */ return 1.0 / x; } /* integer arguments */ /* raise inexact when non-integer */ if x == floor(x) { if sign { return 0.0 / 0.0; } if x <= FACT.len() as f64 { return i!(FACT, (x as usize) - 1); } } /* x >= 172: tgamma(x)=inf with overflow */ /* x =< -184: tgamma(x)=+-0 with underflow */ if ix >= 0x40670000 { /* |x| >= 184 */ if sign { let x1p_126 = f64::from_bits(0x3810000000000000); // 0x1p-126 == 2^-126 force_eval!((x1p_126 / x) as f32); if floor(x) * 0.5 == floor(x * 0.5) { return 0.0; } else { return -0.0; } } let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 == 2^1023 x *= x1p1023; return x; } absx = if sign { -x } else { x }; /* handle the error of x + g - 0.5 */ y = absx + GMHALF; if absx > GMHALF { dy = y - absx; dy -= GMHALF; } else { dy = y - GMHALF; dy -= absx; } z = absx - 0.5; r = s(absx) * exp(-y); if x < 0.0 { /* reflection formula for negative x */ /* sinpi(absx) is not 0, integers are already handled */ r = -PI / (sinpi(absx) * absx * r); dy = -dy; z = -z; } r += dy * (GMHALF + 0.5) * r / y; z = pow(y, 0.5 * z); y = r * z * z; return y; } compiler_builtins-0.1.101/libm/src/math/tgammaf.rs000064400000000000000000000002161046102023000201330ustar 00000000000000use super::tgamma; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn tgammaf(x: f32) -> f32 { tgamma(x as f64) as f32 } compiler_builtins-0.1.101/libm/src/math/trunc.rs000064400000000000000000000017021046102023000176530ustar 00000000000000use core::f64; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn trunc(x: f64) -> f64 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f64.trunc` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return unsafe { ::core::intrinsics::truncf64(x) } } } let x1p120 = f64::from_bits(0x4770000000000000); // 0x1p120f === 2 ^ 120 let mut i: u64 = x.to_bits(); let mut e: i64 = (i >> 52 & 0x7ff) as i64 - 0x3ff + 12; let m: u64; if e >= 52 + 12 { return x; } if e < 12 { e = 1; } m = -1i64 as u64 >> e; if (i & m) == 0 { return x; } force_eval!(x + x1p120); i &= !m; f64::from_bits(i) } #[cfg(test)] mod tests { #[test] fn sanity_check() { assert_eq!(super::trunc(1.1), 1.0); } } compiler_builtins-0.1.101/libm/src/math/truncf.rs000064400000000000000000000020661046102023000200250ustar 00000000000000use core::f32; #[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)] pub fn truncf(x: f32) -> f32 { // On wasm32 we know that LLVM's intrinsic will compile to an optimized // `f32.trunc` native instruction, so we can leverage this for both code size // and speed. llvm_intrinsically_optimized! { #[cfg(target_arch = "wasm32")] { return unsafe { ::core::intrinsics::truncf32(x) } } } let x1p120 = f32::from_bits(0x7b800000); // 0x1p120f === 2 ^ 120 let mut i: u32 = x.to_bits(); let mut e: i32 = (i >> 23 & 0xff) as i32 - 0x7f + 9; let m: u32; if e >= 23 + 9 { return x; } if e < 9 { e = 1; } m = -1i32 as u32 >> e; if (i & m) == 0 { return x; } force_eval!(x + x1p120); i &= !m; f32::from_bits(i) } // PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520 #[cfg(not(target_arch = "powerpc64"))] #[cfg(test)] mod tests { #[test] fn sanity_check() { assert_eq!(super::truncf(1.1), 1.0); } } compiler_builtins-0.1.101/src/aarch64.rs000064400000000000000000000010031046102023000160660ustar 00000000000000#![allow(unused_imports)] use core::intrinsics; intrinsics! { #[naked] #[cfg(all(target_os = "uefi", not(feature = "no-asm")))] pub unsafe extern "C" fn __chkstk() { core::arch::asm!( ".p2align 2", "lsl x16, x15, #4", "mov x17, sp", "1:", "sub x17, x17, 4096", "subs x16, x16, 4096", "ldr xzr, [x17]", "b.gt 1b", "ret", options(noreturn) ); } } compiler_builtins-0.1.101/src/aarch64_linux.rs000064400000000000000000000240071046102023000173160ustar 00000000000000//! Aarch64 targets have two possible implementations for atomics: //! 1. Load-Locked, Store-Conditional (LL/SC), older and slower. //! 2. Large System Extensions (LSE), newer and faster. //! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics", //! where atomic operations call into the compiler runtime to dispatch between two depending on //! which is supported on the current CPU. //! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion. //! //! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection. //! Use the `compiler-rt` intrinsics if you want LSE support. //! //! Ported from `aarch64/lse.S` in LLVM's compiler-rt. //! //! Generate functions for each of the following symbols: //! __aarch64_casM_ORDER //! __aarch64_swpN_ORDER //! __aarch64_ldaddN_ORDER //! __aarch64_ldclrN_ORDER //! __aarch64_ldeorN_ORDER //! __aarch64_ldsetN_ORDER //! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel } //! //! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants. //! We do something similar, but with macro arguments. #![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule // We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor. /// Translate a byte size to a Rust type. #[rustfmt::skip] macro_rules! int_ty { (1) => { i8 }; (2) => { i16 }; (4) => { i32 }; (8) => { i64 }; (16) => { i128 }; } /// Given a byte size and a register number, return a register of the appropriate size. /// /// See . #[rustfmt::skip] macro_rules! reg { (1, $num:literal) => { concat!("w", $num) }; (2, $num:literal) => { concat!("w", $num) }; (4, $num:literal) => { concat!("w", $num) }; (8, $num:literal) => { concat!("x", $num) }; } /// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction. #[rustfmt::skip] macro_rules! acquire { (Relaxed) => { "" }; (Acquire) => { "a" }; (Release) => { "" }; (AcqRel) => { "a" }; } /// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction. #[rustfmt::skip] macro_rules! release { (Relaxed) => { "" }; (Acquire) => { "" }; (Release) => { "l" }; (AcqRel) => { "l" }; } /// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction. #[rustfmt::skip] macro_rules! size { (1) => { "b" }; (2) => { "h" }; (4) => { "" }; (8) => { "" }; (16) => { "" }; } /// Given a byte size, translate it to an Unsigned eXTend instruction /// with the correct semantics. /// /// See #[rustfmt::skip] macro_rules! uxt { (1) => { "uxtb" }; (2) => { "uxth" }; ($_:tt) => { "mov" }; } /// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Register instruction /// with the correct semantics. /// /// See . macro_rules! ldxr { ($ordering:ident, $bytes:tt) => { concat!("ld", acquire!($ordering), "xr", size!($bytes)) }; } /// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction /// with the correct semantics. /// /// See . macro_rules! stxr { ($ordering:ident, $bytes:tt) => { concat!("st", release!($ordering), "xr", size!($bytes)) }; } /// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction /// with the correct semantics. /// /// See macro_rules! ldxp { ($ordering:ident) => { concat!("ld", acquire!($ordering), "xp") }; } /// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction /// with the correct semantics. /// /// See . macro_rules! stxp { ($ordering:ident) => { concat!("st", release!($ordering), "xp") }; } /// See . macro_rules! compare_and_swap { ($ordering:ident, $bytes:tt, $name:ident) => { intrinsics! { #[maybe_use_optimized_c_shim] #[naked] pub unsafe extern "C" fn $name ( expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { // We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap. unsafe { core::arch::asm! { // UXT s(tmp0), s(0) concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", // LDXR s(0), [x2] concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x2]"), // cmp s(0), s(tmp0) concat!("cmp ", reg!($bytes, 0), ", ", reg!($bytes, 16)), "bne 1f", // STXR w(tmp1), s(1), [x2] concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 1), ", [x2]"), "cbnz w17, 0b", "1:", "ret", options(noreturn) } } } } }; } // i128 uses a completely different impl, so it has its own macro. macro_rules! compare_and_swap_i128 { ($ordering:ident, $name:ident) => { intrinsics! { #[maybe_use_optimized_c_shim] #[naked] pub unsafe extern "C" fn $name ( expected: i128, desired: i128, ptr: *mut i128 ) -> i128 { unsafe { core::arch::asm! { "mov x16, x0", "mov x17, x1", "0:", // LDXP x0, x1, [x4] concat!(ldxp!($ordering), " x0, x1, [x4]"), "cmp x0, x16", "ccmp x1, x17, #0, eq", "bne 1f", // STXP w(tmp2), x2, x3, [x4] concat!(stxp!($ordering), " w15, x2, x3, [x4]"), "cbnz w15, 0b", "1:", "ret", options(noreturn) } } } } }; } /// See . macro_rules! swap { ($ordering:ident, $bytes:tt, $name:ident) => { intrinsics! { #[maybe_use_optimized_c_shim] #[naked] pub unsafe extern "C" fn $name ( left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { unsafe { core::arch::asm! { // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", // LDXR s(0), [x1] concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"), // STXR w(tmp1), s(tmp0), [x1] concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"), "cbnz w17, 0b", "ret", options(noreturn) } } } } }; } /// See (e.g.) . macro_rules! fetch_op { ($ordering:ident, $bytes:tt, $name:ident, $op:literal) => { intrinsics! { #[maybe_use_optimized_c_shim] #[naked] pub unsafe extern "C" fn $name ( val: int_ty!($bytes), ptr: *mut int_ty!($bytes) ) -> int_ty!($bytes) { unsafe { core::arch::asm! { // mov s(tmp0), s(0) concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)), "0:", // LDXR s(0), [x1] concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"), // OP s(tmp1), s(0), s(tmp0) concat!($op, " ", reg!($bytes, 17), ", ", reg!($bytes, 0), ", ", reg!($bytes, 16)), // STXR w(tmp2), s(tmp1), [x1] concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"), "cbnz w15, 0b", "ret", options(noreturn) } } } } } } // We need a single macro to pass to `foreach_ldadd`. macro_rules! add { ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "add" } }; } macro_rules! and { ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "bic" } }; } macro_rules! xor { ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "eor" } }; } macro_rules! or { ($ordering:ident, $bytes:tt, $name:ident) => { fetch_op! { $ordering, $bytes, $name, "orr" } }; } // See `generate_aarch64_outlined_atomics` in build.rs. include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs")); foreach_cas!(compare_and_swap); foreach_cas16!(compare_and_swap_i128); foreach_swp!(swap); foreach_ldadd!(add); foreach_ldclr!(and); foreach_ldeor!(xor); foreach_ldset!(or); compiler_builtins-0.1.101/src/arm.rs000064400000000000000000000125571046102023000154350ustar 00000000000000#![cfg(not(feature = "no-asm"))] #![allow(unused_imports)] use core::intrinsics; // iOS symbols have a leading underscore. #[cfg(target_os = "ios")] macro_rules! bl { ($func:literal) => { concat!("bl _", $func) }; } #[cfg(not(target_os = "ios"))] macro_rules! bl { ($func:literal) => { concat!("bl ", $func) }; } intrinsics! { // NOTE This function and the ones below are implemented using assembly because they are using a // custom calling convention which can't be implemented using a normal Rust function. #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] #[cfg(not(target_env = "msvc"))] pub unsafe extern "C" fn __aeabi_uidivmod() { core::arch::asm!( "push {{lr}}", "sub sp, sp, #4", "mov r2, sp", bl!("__udivmodsi4"), "ldr r1, [sp]", "add sp, sp, #4", "pop {{pc}}", options(noreturn) ); } #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] pub unsafe extern "C" fn __aeabi_uldivmod() { core::arch::asm!( "push {{r4, lr}}", "sub sp, sp, #16", "add r4, sp, #8", "str r4, [sp]", bl!("__udivmoddi4"), "ldr r2, [sp, #8]", "ldr r3, [sp, #12]", "add sp, sp, #16", "pop {{r4, pc}}", options(noreturn) ); } #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] pub unsafe extern "C" fn __aeabi_idivmod() { core::arch::asm!( "push {{r0, r1, r4, lr}}", bl!("__aeabi_idiv"), "pop {{r1, r2}}", "muls r2, r2, r0", "subs r1, r1, r2", "pop {{r4, pc}}", options(noreturn) ); } #[cfg_attr(all(not(windows), not(target_vendor="apple")), weak)] #[naked] pub unsafe extern "C" fn __aeabi_ldivmod() { core::arch::asm!( "push {{r4, lr}}", "sub sp, sp, #16", "add r4, sp, #8", "str r4, [sp]", bl!("__divmoddi4"), "ldr r2, [sp, #8]", "ldr r3, [sp, #12]", "add sp, sp, #16", "pop {{r4, pc}}", options(noreturn) ); } // The following functions use weak linkage to allow users to override // with custom implementation. // FIXME: The `*4` and `*8` variants should be defined as aliases. #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) { crate::mem::memcpy(dest, src, n); } #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) { // We are guaranteed 4-alignment, so accessing at u32 is okay. let mut dest = dest as *mut u32; let mut src = src as *mut u32; let mut n = n; while n >= 4 { *dest = *src; dest = dest.offset(1); src = src.offset(1); n -= 4; } __aeabi_memcpy(dest as *mut u8, src as *const u8, n); } #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memcpy4(dest, src, n); } #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) { crate::mem::memmove(dest, src, n); } #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memmove4(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memmove8(dest: *mut u8, src: *const u8, n: usize) { __aeabi_memmove(dest, src, n); } #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memset(dest: *mut u8, n: usize, c: i32) { // Note the different argument order crate::mem::memset(dest, c, n); } #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memset4(dest: *mut u8, n: usize, c: i32) { let mut dest = dest as *mut u32; let mut n = n; let byte = (c as u32) & 0xff; let c = (byte << 24) | (byte << 16) | (byte << 8) | byte; while n >= 4 { *dest = c; dest = dest.offset(1); n -= 4; } __aeabi_memset(dest as *mut u8, n, byte as i32); } #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memset8(dest: *mut u8, n: usize, c: i32) { __aeabi_memset4(dest, n, c); } #[weak] #[cfg(not(target_os = "ios"))] pub unsafe extern "aapcs" fn __aeabi_memclr(dest: *mut u8, n: usize) { __aeabi_memset(dest, n, 0); } #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memclr4(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } #[weak] #[cfg(not(any(target_os = "ios", target_env = "msvc")))] pub unsafe extern "aapcs" fn __aeabi_memclr8(dest: *mut u8, n: usize) { __aeabi_memset4(dest, n, 0); } } compiler_builtins-0.1.101/src/arm_linux.rs000064400000000000000000000201371046102023000166450ustar 00000000000000use core::intrinsics; use core::mem; // Kernel-provided user-mode helper functions: // https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt unsafe fn __kuser_cmpxchg(oldval: u32, newval: u32, ptr: *mut u32) -> bool { let f: extern "C" fn(u32, u32, *mut u32) -> u32 = mem::transmute(0xffff0fc0usize as *const ()); f(oldval, newval, ptr) == 0 } unsafe fn __kuser_memory_barrier() { let f: extern "C" fn() = mem::transmute(0xffff0fa0usize as *const ()); f(); } // Word-align a pointer fn align_ptr(ptr: *mut T) -> *mut u32 { // This gives us a mask of 0 when T == u32 since the pointer is already // supposed to be aligned, which avoids any masking in that case. let ptr_mask = 3 & (4 - mem::size_of::()); (ptr as usize & !ptr_mask) as *mut u32 } // Calculate the shift and mask of a value inside an aligned word fn get_shift_mask(ptr: *mut T) -> (u32, u32) { // Mask to get the low byte/halfword/word let mask = match mem::size_of::() { 1 => 0xff, 2 => 0xffff, 4 => 0xffffffff, _ => unreachable!(), }; // If we are on big-endian then we need to adjust the shift accordingly let endian_adjust = if cfg!(target_endian = "little") { 0 } else { 4 - mem::size_of::() as u32 }; // Shift to get the desired element in the word let ptr_mask = 3 & (4 - mem::size_of::()); let shift = ((ptr as usize & ptr_mask) as u32 ^ endian_adjust) * 8; (shift, mask) } // Extract a value from an aligned word fn extract_aligned(aligned: u32, shift: u32, mask: u32) -> u32 { (aligned >> shift) & mask } // Insert a value into an aligned word fn insert_aligned(aligned: u32, val: u32, shift: u32, mask: u32) -> u32 { (aligned & !(mask << shift)) | ((val & mask) << shift) } // Generic atomic read-modify-write operation unsafe fn atomic_rmw u32, G: Fn(u32, u32) -> u32>(ptr: *mut T, f: F, g: G) -> u32 { let aligned_ptr = align_ptr(ptr); let (shift, mask) = get_shift_mask(ptr); loop { let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr); let curval = extract_aligned(curval_aligned, shift, mask); let newval = f(curval); let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) { return g(curval, newval); } } } // Generic atomic compare-exchange operation unsafe fn atomic_cmpxchg(ptr: *mut T, oldval: u32, newval: u32) -> u32 { let aligned_ptr = align_ptr(ptr); let (shift, mask) = get_shift_mask(ptr); loop { let curval_aligned = intrinsics::atomic_load_unordered(aligned_ptr); let curval = extract_aligned(curval_aligned, shift, mask); if curval != oldval { return curval; } let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask); if __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) { return oldval; } } } macro_rules! atomic_rmw { ($name:ident, $ty:ty, $op:expr, $fetch:expr) => { intrinsics! { pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty { atomic_rmw(ptr, |x| $op(x as $ty, val) as u32, |old, new| $fetch(old, new)) as $ty } } }; (@old $name:ident, $ty:ty, $op:expr) => { atomic_rmw!($name, $ty, $op, |old, _| old); }; (@new $name:ident, $ty:ty, $op:expr) => { atomic_rmw!($name, $ty, $op, |_, new| new); }; } macro_rules! atomic_cmpxchg { ($name:ident, $ty:ty) => { intrinsics! { pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty { atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty } } }; } atomic_rmw!(@old __sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b)); atomic_rmw!(@old __sync_fetch_and_add_2, u16, |a: u16, b: u16| a .wrapping_add(b)); atomic_rmw!(@old __sync_fetch_and_add_4, u32, |a: u32, b: u32| a .wrapping_add(b)); atomic_rmw!(@new __sync_add_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_add(b)); atomic_rmw!(@new __sync_add_and_fetch_2, u16, |a: u16, b: u16| a .wrapping_add(b)); atomic_rmw!(@new __sync_add_and_fetch_4, u32, |a: u32, b: u32| a .wrapping_add(b)); atomic_rmw!(@old __sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b)); atomic_rmw!(@old __sync_fetch_and_sub_2, u16, |a: u16, b: u16| a .wrapping_sub(b)); atomic_rmw!(@old __sync_fetch_and_sub_4, u32, |a: u32, b: u32| a .wrapping_sub(b)); atomic_rmw!(@new __sync_sub_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_sub(b)); atomic_rmw!(@new __sync_sub_and_fetch_2, u16, |a: u16, b: u16| a .wrapping_sub(b)); atomic_rmw!(@new __sync_sub_and_fetch_4, u32, |a: u32, b: u32| a .wrapping_sub(b)); atomic_rmw!(@old __sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b); atomic_rmw!(@old __sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b); atomic_rmw!(@old __sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b); atomic_rmw!(@new __sync_and_and_fetch_1, u8, |a: u8, b: u8| a & b); atomic_rmw!(@new __sync_and_and_fetch_2, u16, |a: u16, b: u16| a & b); atomic_rmw!(@new __sync_and_and_fetch_4, u32, |a: u32, b: u32| a & b); atomic_rmw!(@old __sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b); atomic_rmw!(@old __sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b); atomic_rmw!(@old __sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b); atomic_rmw!(@new __sync_or_and_fetch_1, u8, |a: u8, b: u8| a | b); atomic_rmw!(@new __sync_or_and_fetch_2, u16, |a: u16, b: u16| a | b); atomic_rmw!(@new __sync_or_and_fetch_4, u32, |a: u32, b: u32| a | b); atomic_rmw!(@old __sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b); atomic_rmw!(@old __sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b); atomic_rmw!(@old __sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b); atomic_rmw!(@new __sync_xor_and_fetch_1, u8, |a: u8, b: u8| a ^ b); atomic_rmw!(@new __sync_xor_and_fetch_2, u16, |a: u16, b: u16| a ^ b); atomic_rmw!(@new __sync_xor_and_fetch_4, u32, |a: u32, b: u32| a ^ b); atomic_rmw!(@old __sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b)); atomic_rmw!(@old __sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b)); atomic_rmw!(@old __sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b)); atomic_rmw!(@new __sync_nand_and_fetch_1, u8, |a: u8, b: u8| !(a & b)); atomic_rmw!(@new __sync_nand_and_fetch_2, u16, |a: u16, b: u16| !(a & b)); atomic_rmw!(@new __sync_nand_and_fetch_4, u32, |a: u32, b: u32| !(a & b)); atomic_rmw!(@old __sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b { a } else { b }); atomic_rmw!(@old __sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b { a } else { b }); atomic_rmw!(@old __sync_lock_test_and_set_1, u8, |_: u8, b: u8| b); atomic_rmw!(@old __sync_lock_test_and_set_2, u16, |_: u16, b: u16| b); atomic_rmw!(@old __sync_lock_test_and_set_4, u32, |_: u32, b: u32| b); atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8); atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16); atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32); intrinsics! { pub unsafe extern "C" fn __sync_synchronize() { __kuser_memory_barrier(); } } compiler_builtins-0.1.101/src/float/add.rs000064400000000000000000000156361046102023000165140ustar 00000000000000use crate::float::Float; use crate::int::{CastInto, Int}; /// Returns `a + b` fn add(a: F, b: F) -> F where u32: CastInto, F::Int: CastInto, i32: CastInto, F::Int: CastInto, { let one = F::Int::ONE; let zero = F::Int::ZERO; let bits = F::BITS.cast(); let significand_bits = F::SIGNIFICAND_BITS; let max_exponent = F::EXPONENT_MAX; let implicit_bit = F::IMPLICIT_BIT; let significand_mask = F::SIGNIFICAND_MASK; let sign_bit = F::SIGN_MASK as F::Int; let abs_mask = sign_bit - one; let exponent_mask = F::EXPONENT_MASK; let inf_rep = exponent_mask; let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; let mut a_rep = a.repr(); let mut b_rep = b.repr(); let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; // Detect if a or b is zero, infinity, or NaN. if a_abs.wrapping_sub(one) >= inf_rep - one || b_abs.wrapping_sub(one) >= inf_rep - one { // NaN + anything = qNaN if a_abs > inf_rep { return F::from_repr(a_abs | quiet_bit); } // anything + NaN = qNaN if b_abs > inf_rep { return F::from_repr(b_abs | quiet_bit); } if a_abs == inf_rep { // +/-infinity + -/+infinity = qNaN if (a.repr() ^ b.repr()) == sign_bit { return F::from_repr(qnan_rep); } else { // +/-infinity + anything remaining = +/- infinity return a; } } // anything remaining + +/-infinity = +/-infinity if b_abs == inf_rep { return b; } // zero + anything = anything if a_abs == Int::ZERO { // but we need to get the sign right for zero + zero if b_abs == Int::ZERO { return F::from_repr(a.repr() & b.repr()); } else { return b; } } // anything + zero = anything if b_abs == Int::ZERO { return a; } } // Swap a and b if necessary so that a has the larger absolute value. if b_abs > a_abs { // Don't use mem::swap because it may generate references to memcpy in unoptimized code. let tmp = a_rep; a_rep = b_rep; b_rep = tmp; } // Extract the exponent and significand from the (possibly swapped) a and b. let mut a_exponent: i32 = ((a_rep & exponent_mask) >> significand_bits).cast(); let mut b_exponent: i32 = ((b_rep & exponent_mask) >> significand_bits).cast(); let mut a_significand = a_rep & significand_mask; let mut b_significand = b_rep & significand_mask; // normalize any denormals, and adjust the exponent accordingly. if a_exponent == 0 { let (exponent, significand) = F::normalize(a_significand); a_exponent = exponent; a_significand = significand; } if b_exponent == 0 { let (exponent, significand) = F::normalize(b_significand); b_exponent = exponent; b_significand = significand; } // The sign of the result is the sign of the larger operand, a. If they // have opposite signs, we are performing a subtraction; otherwise addition. let result_sign = a_rep & sign_bit; let subtraction = ((a_rep ^ b_rep) & sign_bit) != zero; // Shift the significands to give us round, guard and sticky, and or in the // implicit significand bit. (If we fell through from the denormal path it // was already set by normalize(), but setting it twice won't hurt // anything.) a_significand = (a_significand | implicit_bit) << 3; b_significand = (b_significand | implicit_bit) << 3; // Shift the significand of b by the difference in exponents, with a sticky // bottom bit to get rounding correct. let align = a_exponent.wrapping_sub(b_exponent).cast(); if align != Int::ZERO { if align < bits { let sticky = F::Int::from_bool(b_significand << bits.wrapping_sub(align).cast() != Int::ZERO); b_significand = (b_significand >> align.cast()) | sticky; } else { b_significand = one; // sticky; b is known to be non-zero. } } if subtraction { a_significand = a_significand.wrapping_sub(b_significand); // If a == -b, return +zero. if a_significand == Int::ZERO { return F::from_repr(Int::ZERO); } // If partial cancellation occured, we need to left-shift the result // and adjust the exponent: if a_significand < implicit_bit << 3 { let shift = a_significand.leading_zeros() as i32 - (implicit_bit << 3).leading_zeros() as i32; a_significand <<= shift; a_exponent -= shift; } } else { // addition a_significand += b_significand; // If the addition carried up, we need to right-shift the result and // adjust the exponent: if a_significand & implicit_bit << 4 != Int::ZERO { let sticky = F::Int::from_bool(a_significand & one != Int::ZERO); a_significand = a_significand >> 1 | sticky; a_exponent += 1; } } // If we have overflowed the type, return +/- infinity: if a_exponent >= max_exponent as i32 { return F::from_repr(inf_rep | result_sign); } if a_exponent <= 0 { // Result is denormal before rounding; the exponent is zero and we // need to shift the significand. let shift = (1 - a_exponent).cast(); let sticky = F::Int::from_bool((a_significand << bits.wrapping_sub(shift).cast()) != Int::ZERO); a_significand = a_significand >> shift.cast() | sticky; a_exponent = 0; } // Low three bits are round, guard, and sticky. let a_significand_i32: i32 = a_significand.cast(); let round_guard_sticky: i32 = a_significand_i32 & 0x7; // Shift the significand into place, and mask off the implicit bit. let mut result = a_significand >> 3 & significand_mask; // Insert the exponent and sign. result |= a_exponent.cast() << significand_bits; result |= result_sign; // Final rounding. The result may overflow to infinity, but that is the // correct result in that case. if round_guard_sticky > 0x4 { result += one; } if round_guard_sticky == 0x4 { result += result & one; } F::from_repr(result) } intrinsics! { #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_fadd] pub extern "C" fn __addsf3(a: f32, b: f32) -> f32 { add(a, b) } #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_dadd] pub extern "C" fn __adddf3(a: f64, b: f64) -> f64 { add(a, b) } #[cfg(target_arch = "arm")] pub extern "C" fn __addsf3vfp(a: f32, b: f32) -> f32 { a + b } #[cfg(target_arch = "arm")] pub extern "C" fn __adddf3vfp(a: f64, b: f64) -> f64 { a + b } } compiler_builtins-0.1.101/src/float/cmp.rs000064400000000000000000000144231046102023000165340ustar 00000000000000#![allow(unreachable_code)] use crate::float::Float; use crate::int::Int; #[derive(Clone, Copy)] enum Result { Less, Equal, Greater, Unordered, } impl Result { fn to_le_abi(self) -> i32 { match self { Result::Less => -1, Result::Equal => 0, Result::Greater => 1, Result::Unordered => 1, } } fn to_ge_abi(self) -> i32 { match self { Result::Less => -1, Result::Equal => 0, Result::Greater => 1, Result::Unordered => -1, } } } fn cmp(a: F, b: F) -> Result { let one = F::Int::ONE; let zero = F::Int::ZERO; let szero = F::SignedInt::ZERO; let sign_bit = F::SIGN_MASK as F::Int; let abs_mask = sign_bit - one; let exponent_mask = F::EXPONENT_MASK; let inf_rep = exponent_mask; let a_rep = a.repr(); let b_rep = b.repr(); let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; // If either a or b is NaN, they are unordered. if a_abs > inf_rep || b_abs > inf_rep { return Result::Unordered; } // If a and b are both zeros, they are equal. if a_abs | b_abs == zero { return Result::Equal; } let a_srep = a.signed_repr(); let b_srep = b.signed_repr(); // If at least one of a and b is positive, we get the same result comparing // a and b as signed integers as we would with a fp_ting-point compare. if a_srep & b_srep >= szero { if a_srep < b_srep { Result::Less } else if a_srep == b_srep { Result::Equal } else { Result::Greater } // Otherwise, both are negative, so we need to flip the sense of the // comparison to get the correct result. (This assumes a twos- or ones- // complement integer representation; if integers are represented in a // sign-magnitude representation, then this flip is incorrect). } else if a_srep > b_srep { Result::Less } else if a_srep == b_srep { Result::Equal } else { Result::Greater } } fn unord(a: F, b: F) -> bool { let one = F::Int::ONE; let sign_bit = F::SIGN_MASK as F::Int; let abs_mask = sign_bit - one; let exponent_mask = F::EXPONENT_MASK; let inf_rep = exponent_mask; let a_rep = a.repr(); let b_rep = b.repr(); let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; a_abs > inf_rep || b_abs > inf_rep } intrinsics! { #[avr_skip] pub extern "C" fn __lesf2(a: f32, b: f32) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] pub extern "C" fn __gesf2(a: f32, b: f32) -> i32 { cmp(a, b).to_ge_abi() } #[avr_skip] #[arm_aeabi_alias = __aeabi_fcmpun] pub extern "C" fn __unordsf2(a: f32, b: f32) -> i32 { unord(a, b) as i32 } #[avr_skip] pub extern "C" fn __eqsf2(a: f32, b: f32) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] pub extern "C" fn __ltsf2(a: f32, b: f32) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] pub extern "C" fn __nesf2(a: f32, b: f32) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] pub extern "C" fn __gtsf2(a: f32, b: f32) -> i32 { cmp(a, b).to_ge_abi() } #[avr_skip] pub extern "C" fn __ledf2(a: f64, b: f64) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] pub extern "C" fn __gedf2(a: f64, b: f64) -> i32 { cmp(a, b).to_ge_abi() } #[avr_skip] #[arm_aeabi_alias = __aeabi_dcmpun] pub extern "C" fn __unorddf2(a: f64, b: f64) -> i32 { unord(a, b) as i32 } #[avr_skip] pub extern "C" fn __eqdf2(a: f64, b: f64) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] pub extern "C" fn __ltdf2(a: f64, b: f64) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] pub extern "C" fn __nedf2(a: f64, b: f64) -> i32 { cmp(a, b).to_le_abi() } #[avr_skip] pub extern "C" fn __gtdf2(a: f64, b: f64) -> i32 { cmp(a, b).to_ge_abi() } } #[cfg(target_arch = "arm")] intrinsics! { pub extern "aapcs" fn __aeabi_fcmple(a: f32, b: f32) -> i32 { (__lesf2(a, b) <= 0) as i32 } pub extern "aapcs" fn __aeabi_fcmpge(a: f32, b: f32) -> i32 { (__gesf2(a, b) >= 0) as i32 } pub extern "aapcs" fn __aeabi_fcmpeq(a: f32, b: f32) -> i32 { (__eqsf2(a, b) == 0) as i32 } pub extern "aapcs" fn __aeabi_fcmplt(a: f32, b: f32) -> i32 { (__ltsf2(a, b) < 0) as i32 } pub extern "aapcs" fn __aeabi_fcmpgt(a: f32, b: f32) -> i32 { (__gtsf2(a, b) > 0) as i32 } pub extern "aapcs" fn __aeabi_dcmple(a: f64, b: f64) -> i32 { (__ledf2(a, b) <= 0) as i32 } pub extern "aapcs" fn __aeabi_dcmpge(a: f64, b: f64) -> i32 { (__gedf2(a, b) >= 0) as i32 } pub extern "aapcs" fn __aeabi_dcmpeq(a: f64, b: f64) -> i32 { (__eqdf2(a, b) == 0) as i32 } pub extern "aapcs" fn __aeabi_dcmplt(a: f64, b: f64) -> i32 { (__ltdf2(a, b) < 0) as i32 } pub extern "aapcs" fn __aeabi_dcmpgt(a: f64, b: f64) -> i32 { (__gtdf2(a, b) > 0) as i32 } // On hard-float targets LLVM will use native instructions // for all VFP intrinsics below pub extern "C" fn __gesf2vfp(a: f32, b: f32) -> i32 { (a >= b) as i32 } pub extern "C" fn __gedf2vfp(a: f64, b: f64) -> i32 { (a >= b) as i32 } pub extern "C" fn __gtsf2vfp(a: f32, b: f32) -> i32 { (a > b) as i32 } pub extern "C" fn __gtdf2vfp(a: f64, b: f64) -> i32 { (a > b) as i32 } pub extern "C" fn __ltsf2vfp(a: f32, b: f32) -> i32 { (a < b) as i32 } pub extern "C" fn __ltdf2vfp(a: f64, b: f64) -> i32 { (a < b) as i32 } pub extern "C" fn __lesf2vfp(a: f32, b: f32) -> i32 { (a <= b) as i32 } pub extern "C" fn __ledf2vfp(a: f64, b: f64) -> i32 { (a <= b) as i32 } pub extern "C" fn __nesf2vfp(a: f32, b: f32) -> i32 { (a != b) as i32 } pub extern "C" fn __nedf2vfp(a: f64, b: f64) -> i32 { (a != b) as i32 } pub extern "C" fn __eqsf2vfp(a: f32, b: f32) -> i32 { (a == b) as i32 } pub extern "C" fn __eqdf2vfp(a: f64, b: f64) -> i32 { (a == b) as i32 } } compiler_builtins-0.1.101/src/float/conv.rs000064400000000000000000000331071046102023000167220ustar 00000000000000/// Conversions from integers to floats. /// /// These are hand-optimized bit twiddling code, /// which unfortunately isn't the easiest kind of code to read. /// /// The algorithm is explained here: mod int_to_float { pub fn u32_to_f32_bits(i: u32) -> u32 { if i == 0 { return 0; } let n = i.leading_zeros(); let a = (i << n) >> 8; // Significant bits, with bit 24 still in tact. let b = (i << n) << 24; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. let e = 157 - n; // Exponent plus 127, minus one. (e << 23) + m // + not |, so the mantissa can overflow into the exponent. } pub fn u32_to_f64_bits(i: u32) -> u64 { if i == 0 { return 0; } let n = i.leading_zeros(); let m = (i as u64) << (21 + n); // Significant bits, with bit 53 still in tact. let e = 1053 - n as u64; // Exponent plus 1023, minus one. (e << 52) + m // Bit 53 of m will overflow into e. } pub fn u64_to_f32_bits(i: u64) -> u32 { let n = i.leading_zeros(); let y = i.wrapping_shl(n); let a = (y >> 40) as u32; // Significant bits, with bit 24 still in tact. let b = (y >> 8 | y & 0xFFFF) as u32; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. let e = if i == 0 { 0 } else { 189 - n }; // Exponent plus 127, minus one, except for zero. (e << 23) + m // + not |, so the mantissa can overflow into the exponent. } pub fn u64_to_f64_bits(i: u64) -> u64 { if i == 0 { return 0; } let n = i.leading_zeros(); let a = (i << n) >> 11; // Significant bits, with bit 53 still in tact. let b = (i << n) << 53; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. let e = 1085 - n as u64; // Exponent plus 1023, minus one. (e << 52) + m // + not |, so the mantissa can overflow into the exponent. } pub fn u128_to_f32_bits(i: u128) -> u32 { let n = i.leading_zeros(); let y = i.wrapping_shl(n); let a = (y >> 104) as u32; // Significant bits, with bit 24 still in tact. let b = (y >> 72) as u32 | ((y << 32) >> 32 != 0) as u32; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 31 & !a)) >> 31); // Add one when we need to round up. Break ties to even. let e = if i == 0 { 0 } else { 253 - n }; // Exponent plus 127, minus one, except for zero. (e << 23) + m // + not |, so the mantissa can overflow into the exponent. } pub fn u128_to_f64_bits(i: u128) -> u64 { let n = i.leading_zeros(); let y = i.wrapping_shl(n); let a = (y >> 75) as u64; // Significant bits, with bit 53 still in tact. let b = (y >> 11 | y & 0xFFFF_FFFF) as u64; // Insignificant bits, only relevant for rounding. let m = a + ((b - (b >> 63 & !a)) >> 63); // Add one when we need to round up. Break ties to even. let e = if i == 0 { 0 } else { 1149 - n as u64 }; // Exponent plus 1023, minus one, except for zero. (e << 52) + m // + not |, so the mantissa can overflow into the exponent. } } // Conversions from unsigned integers to floats. intrinsics! { #[arm_aeabi_alias = __aeabi_ui2f] pub extern "C" fn __floatunsisf(i: u32) -> f32 { f32::from_bits(int_to_float::u32_to_f32_bits(i)) } #[arm_aeabi_alias = __aeabi_ui2d] pub extern "C" fn __floatunsidf(i: u32) -> f64 { f64::from_bits(int_to_float::u32_to_f64_bits(i)) } #[arm_aeabi_alias = __aeabi_ul2f] pub extern "C" fn __floatundisf(i: u64) -> f32 { f32::from_bits(int_to_float::u64_to_f32_bits(i)) } #[arm_aeabi_alias = __aeabi_ul2d] pub extern "C" fn __floatundidf(i: u64) -> f64 { f64::from_bits(int_to_float::u64_to_f64_bits(i)) } #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floatuntisf(i: u128) -> f32 { f32::from_bits(int_to_float::u128_to_f32_bits(i)) } #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floatuntidf(i: u128) -> f64 { f64::from_bits(int_to_float::u128_to_f64_bits(i)) } } // Conversions from signed integers to floats. intrinsics! { #[arm_aeabi_alias = __aeabi_i2f] pub extern "C" fn __floatsisf(i: i32) -> f32 { let sign_bit = ((i >> 31) as u32) << 31; f32::from_bits(int_to_float::u32_to_f32_bits(i.unsigned_abs()) | sign_bit) } #[arm_aeabi_alias = __aeabi_i2d] pub extern "C" fn __floatsidf(i: i32) -> f64 { let sign_bit = ((i >> 31) as u64) << 63; f64::from_bits(int_to_float::u32_to_f64_bits(i.unsigned_abs()) | sign_bit) } #[arm_aeabi_alias = __aeabi_l2f] pub extern "C" fn __floatdisf(i: i64) -> f32 { let sign_bit = ((i >> 63) as u32) << 31; f32::from_bits(int_to_float::u64_to_f32_bits(i.unsigned_abs()) | sign_bit) } #[arm_aeabi_alias = __aeabi_l2d] pub extern "C" fn __floatdidf(i: i64) -> f64 { let sign_bit = ((i >> 63) as u64) << 63; f64::from_bits(int_to_float::u64_to_f64_bits(i.unsigned_abs()) | sign_bit) } #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floattisf(i: i128) -> f32 { let sign_bit = ((i >> 127) as u32) << 31; f32::from_bits(int_to_float::u128_to_f32_bits(i.unsigned_abs()) | sign_bit) } #[cfg_attr(target_os = "uefi", unadjusted_on_win64)] pub extern "C" fn __floattidf(i: i128) -> f64 { let sign_bit = ((i >> 127) as u64) << 63; f64::from_bits(int_to_float::u128_to_f64_bits(i.unsigned_abs()) | sign_bit) } } // Conversions from floats to unsigned integers. intrinsics! { #[arm_aeabi_alias = __aeabi_f2uiz] pub extern "C" fn __fixunssfsi(f: f32) -> u32 { let fbits = f.to_bits(); if fbits < 127 << 23 { // >= 0, < 1 0 } else if fbits < 159 << 23 { // >= 1, < max let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit. let s = 158 - (fbits >> 23); // Shift based on the exponent and bias. m >> s } else if fbits <= 255 << 23 { // >= max (incl. inf) u32::MAX } else { // Negative or NaN 0 } } #[arm_aeabi_alias = __aeabi_f2ulz] pub extern "C" fn __fixunssfdi(f: f32) -> u64 { let fbits = f.to_bits(); if fbits < 127 << 23 { // >= 0, < 1 0 } else if fbits < 191 << 23 { // >= 1, < max let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit. let s = 190 - (fbits >> 23); // Shift based on the exponent and bias. m >> s } else if fbits <= 255 << 23 { // >= max (incl. inf) u64::MAX } else { // Negative or NaN 0 } } #[win64_128bit_abi_hack] pub extern "C" fn __fixunssfti(f: f32) -> u128 { let fbits = f.to_bits(); if fbits < 127 << 23 { // >= 0, < 1 0 } else if fbits < 255 << 23 { // >= 1, < inf let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit. let s = 254 - (fbits >> 23); // Shift based on the exponent and bias. m >> s } else if fbits == 255 << 23 { // == inf u128::MAX } else { // Negative or NaN 0 } } #[arm_aeabi_alias = __aeabi_d2uiz] pub extern "C" fn __fixunsdfsi(f: f64) -> u32 { let fbits = f.to_bits(); if fbits < 1023 << 52 { // >= 0, < 1 0 } else if fbits < 1055 << 52 { // >= 1, < max let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit. let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias. m >> s } else if fbits <= 2047 << 52 { // >= max (incl. inf) u32::MAX } else { // Negative or NaN 0 } } #[arm_aeabi_alias = __aeabi_d2ulz] pub extern "C" fn __fixunsdfdi(f: f64) -> u64 { let fbits = f.to_bits(); if fbits < 1023 << 52 { // >= 0, < 1 0 } else if fbits < 1087 << 52 { // >= 1, < max let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit. let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias. m >> s } else if fbits <= 2047 << 52 { // >= max (incl. inf) u64::MAX } else { // Negative or NaN 0 } } #[win64_128bit_abi_hack] pub extern "C" fn __fixunsdfti(f: f64) -> u128 { let fbits = f.to_bits(); if fbits < 1023 << 52 { // >= 0, < 1 0 } else if fbits < 1151 << 52 { // >= 1, < max let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit. let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias. m >> s } else if fbits <= 2047 << 52 { // >= max (incl. inf) u128::MAX } else { // Negative or NaN 0 } } } // Conversions from floats to signed integers. intrinsics! { #[arm_aeabi_alias = __aeabi_f2iz] pub extern "C" fn __fixsfsi(f: f32) -> i32 { let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. if fbits < 127 << 23 { // >= 0, < 1 0 } else if fbits < 158 << 23 { // >= 1, < max let m = 1 << 31 | fbits << 8; // Mantissa and the implicit 1-bit. let s = 158 - (fbits >> 23); // Shift based on the exponent and bias. let u = (m >> s) as i32; // Unsigned result. if f.is_sign_negative() { -u } else { u } } else if fbits <= 255 << 23 { // >= max (incl. inf) if f.is_sign_negative() { i32::MIN } else { i32::MAX } } else { // NaN 0 } } #[arm_aeabi_alias = __aeabi_f2lz] pub extern "C" fn __fixsfdi(f: f32) -> i64 { let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. if fbits < 127 << 23 { // >= 0, < 1 0 } else if fbits < 190 << 23 { // >= 1, < max let m = 1 << 63 | (fbits as u64) << 40; // Mantissa and the implicit 1-bit. let s = 190 - (fbits >> 23); // Shift based on the exponent and bias. let u = (m >> s) as i64; // Unsigned result. if f.is_sign_negative() { -u } else { u } } else if fbits <= 255 << 23 { // >= max (incl. inf) if f.is_sign_negative() { i64::MIN } else { i64::MAX } } else { // NaN 0 } } #[win64_128bit_abi_hack] pub extern "C" fn __fixsfti(f: f32) -> i128 { let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. if fbits < 127 << 23 { // >= 0, < 1 0 } else if fbits < 254 << 23 { // >= 1, < max let m = 1 << 127 | (fbits as u128) << 104; // Mantissa and the implicit 1-bit. let s = 254 - (fbits >> 23); // Shift based on the exponent and bias. let u = (m >> s) as i128; // Unsigned result. if f.is_sign_negative() { -u } else { u } } else if fbits <= 255 << 23 { // >= max (incl. inf) if f.is_sign_negative() { i128::MIN } else { i128::MAX } } else { // NaN 0 } } #[arm_aeabi_alias = __aeabi_d2iz] pub extern "C" fn __fixdfsi(f: f64) -> i32 { let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. if fbits < 1023 << 52 { // >= 0, < 1 0 } else if fbits < 1054 << 52 { // >= 1, < max let m = 1 << 31 | (fbits >> 21) as u32; // Mantissa and the implicit 1-bit. let s = 1054 - (fbits >> 52); // Shift based on the exponent and bias. let u = (m >> s) as i32; // Unsigned result. if f.is_sign_negative() { -u } else { u } } else if fbits <= 2047 << 52 { // >= max (incl. inf) if f.is_sign_negative() { i32::MIN } else { i32::MAX } } else { // NaN 0 } } #[arm_aeabi_alias = __aeabi_d2lz] pub extern "C" fn __fixdfdi(f: f64) -> i64 { let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. if fbits < 1023 << 52 { // >= 0, < 1 0 } else if fbits < 1086 << 52 { // >= 1, < max let m = 1 << 63 | fbits << 11; // Mantissa and the implicit 1-bit. let s = 1086 - (fbits >> 52); // Shift based on the exponent and bias. let u = (m >> s) as i64; // Unsigned result. if f.is_sign_negative() { -u } else { u } } else if fbits <= 2047 << 52 { // >= max (incl. inf) if f.is_sign_negative() { i64::MIN } else { i64::MAX } } else { // NaN 0 } } #[win64_128bit_abi_hack] pub extern "C" fn __fixdfti(f: f64) -> i128 { let fbits = f.to_bits() & !0 >> 1; // Remove sign bit. if fbits < 1023 << 52 { // >= 0, < 1 0 } else if fbits < 1150 << 52 { // >= 1, < max let m = 1 << 127 | (fbits as u128) << 75; // Mantissa and the implicit 1-bit. let s = 1150 - (fbits >> 52); // Shift based on the exponent and bias. let u = (m >> s) as i128; // Unsigned result. if f.is_sign_negative() { -u } else { u } } else if fbits <= 2047 << 52 { // >= max (incl. inf) if f.is_sign_negative() { i128::MIN } else { i128::MAX } } else { // NaN 0 } } } compiler_builtins-0.1.101/src/float/div.rs000064400000000000000000001200611046102023000165330ustar 00000000000000// The functions are complex with many branches, and explicit // `return`s makes it clear where function exit points are #![allow(clippy::needless_return)] use crate::float::Float; use crate::int::{CastInto, DInt, HInt, Int}; fn div32(a: F, b: F) -> F where u32: CastInto, F::Int: CastInto, i32: CastInto, F::Int: CastInto, F::Int: HInt, ::Int: core::ops::Mul, { const NUMBER_OF_HALF_ITERATIONS: usize = 0; const NUMBER_OF_FULL_ITERATIONS: usize = 3; const USE_NATIVE_FULL_ITERATIONS: bool = true; let one = F::Int::ONE; let zero = F::Int::ZERO; let hw = F::BITS / 2; let lo_mask = u32::MAX >> hw; let significand_bits = F::SIGNIFICAND_BITS; let max_exponent = F::EXPONENT_MAX; let exponent_bias = F::EXPONENT_BIAS; let implicit_bit = F::IMPLICIT_BIT; let significand_mask = F::SIGNIFICAND_MASK; let sign_bit = F::SIGN_MASK as F::Int; let abs_mask = sign_bit - one; let exponent_mask = F::EXPONENT_MASK; let inf_rep = exponent_mask; let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; #[inline(always)] fn negate_u32(a: u32) -> u32 { (::wrapping_neg(a as i32)) as u32 } let a_rep = a.repr(); let b_rep = b.repr(); let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); let quotient_sign = (a_rep ^ b_rep) & sign_bit; let mut a_significand = a_rep & significand_mask; let mut b_significand = b_rep & significand_mask; let mut scale = 0; // Detect if a or b is zero, denormal, infinity, or NaN. if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() { let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; // NaN / anything = qNaN if a_abs > inf_rep { return F::from_repr(a_rep | quiet_bit); } // anything / NaN = qNaN if b_abs > inf_rep { return F::from_repr(b_rep | quiet_bit); } if a_abs == inf_rep { if b_abs == inf_rep { // infinity / infinity = NaN return F::from_repr(qnan_rep); } else { // infinity / anything else = +/- infinity return F::from_repr(a_abs | quotient_sign); } } // anything else / infinity = +/- 0 if b_abs == inf_rep { return F::from_repr(quotient_sign); } if a_abs == zero { if b_abs == zero { // zero / zero = NaN return F::from_repr(qnan_rep); } else { // zero / anything else = +/- zero return F::from_repr(quotient_sign); } } // anything else / zero = +/- infinity if b_abs == zero { return F::from_repr(inf_rep | quotient_sign); } // one or both of a or b is denormal, the other (if applicable) is a // normal number. Renormalize one or both of a and b, and set scale to // include the necessary exponent adjustment. if a_abs < implicit_bit { let (exponent, significand) = F::normalize(a_significand); scale += exponent; a_significand = significand; } if b_abs < implicit_bit { let (exponent, significand) = F::normalize(b_significand); scale -= exponent; b_significand = significand; } } // Set the implicit significand bit. If we fell through from the // denormal path it was already set by normalize( ), but setting it twice // won't hurt anything. a_significand |= implicit_bit; b_significand |= implicit_bit; let written_exponent: i32 = CastInto::::cast( a_exponent .wrapping_sub(b_exponent) .wrapping_add(scale.cast()), ) .wrapping_add(exponent_bias) as i32; let b_uq1 = b_significand << (F::BITS - significand_bits - 1); // Align the significand of b as a UQ1.(n-1) fixed-point number in the range // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. // The max error for this approximation is achieved at endpoints, so // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., // which is about 4.5 bits. // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... // Then, refine the reciprocal estimate using a quadratically converging // Newton-Raphson iteration: // x_{n+1} = x_n * (2 - x_n * b) // // Let b be the original divisor considered "in infinite precision" and // obtained from IEEE754 representation of function argument (with the // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in // UQ1.(W-1). // // Let b_hw be an infinitely precise number obtained from the highest (HW-1) // bits of divisor significand (with the implicit bit set). Corresponds to // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** // version of b_UQ1. // // Let e_n := x_n - 1/b_hw // E_n := x_n - 1/b // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) // = abs(e_n) + (b - b_hw) / (b*b_hw) // <= abs(e_n) + 2 * 2^-HW // rep_t-sized iterations may be slower than the corresponding half-width // variant depending on the handware and whether single/double/quad precision // is selected. // NB: Using half-width iterations increases computation errors due to // rounding, so error estimations have to be computed taking the selected // mode into account! #[allow(clippy::absurd_extreme_comparisons)] let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 { // Starting with (n-1) half-width iterations let b_uq1_hw: u16 = (CastInto::::cast(b_significand) >> (significand_bits + 1 - hw)) as u16; // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW // with W0 being either 16 or 32 and W0 <= HW. // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. // HW is at least 32. Shifting into the highest bits if needed. let c_hw = (0x7504_u32 as u16).wrapping_shl(hw.wrapping_sub(32)); // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, // so x0 fits to UQ0.HW without wrapping. let x_uq0_hw: u16 = { let mut x_uq0_hw: u16 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); // An e_0 error is comprised of errors due to // * x0 being an inherently imprecise first approximation of 1/b_hw // * C_hw being some (irrational) number **truncated** to W0 bits // Please note that e_0 is calculated against the infinitely precise // reciprocal of b_hw (that is, **truncated** version of b). // // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 // By construction, 1 <= b < 2 // f(x) = x * (2 - b*x) = 2*x - b*x^2 // f'(x) = 2 * (1 - b*x) // // On the [0, 1] interval, f(0) = 0, // then it increses until f(1/b) = 1 / b, maximum on (0, 1), // then it decreses to f(1) = 2 - b // // Let g(x) = x - f(x) = b*x^2 - x. // On (0, 1/b), g(x) < 0 <=> f(x) > x // On (1/b, 1], g(x) > 0 <=> f(x) < x // // For half-width iterations, b_hw is used instead of b. #[allow(clippy::reversed_empty_ranges)] for _ in 0..NUMBER_OF_HALF_ITERATIONS { // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp // of corr_UQ1_hw. // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is // expected to be strictly positive because b_UQ1_hw has its highest bit set // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). let corr_uq1_hw: u16 = 0.wrapping_sub((x_uq0_hw as u32).wrapping_mul(b_uq1_hw.cast()) >> hw) as u16; // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally // obtaining an UQ1.(HW-1) number and proving its highest bit could be // considered to be 0 to be able to represent it in UQ0.HW. // From the above analysis of f(x), if corr_UQ1_hw would be represented // without any intermediate loss of precision (that is, in twice_rep_t) // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly // less otherwise. On the other hand, to obtain [1.]000..., one have to pass // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due // to 1.0 being not representable as UQ0.HW). // The fact corr_UQ1_hw was virtually round up (due to result of // multiplication being **first** truncated, then negated - to improve // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. x_uq0_hw = ((x_uq0_hw as u32).wrapping_mul(corr_uq1_hw as u32) >> (hw - 1)) as u16; // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after // any number of iterations, so just subtract 2 from the reciprocal // approximation after last iteration. // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 // = 1 - e_n * b_hw + 2*eps1 // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw // \------ >0 -------/ \-- >0 ---/ // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) } // For initial half-width iterations, U = 2^-HW // Let abs(e_n) <= u_n * U, // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) // Account for possible overflow (see above). For an overflow to occur for the // first time, for "ideal" corr_UQ1_hw (that is, without intermediate // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to // be not below that value (see g(x) above), so it is safe to decrement just // once after the final iteration. On the other hand, an effective value of // divisor changes after this point (from b_hw to b), so adjust here. x_uq0_hw.wrapping_sub(1_u16) }; // Error estimations for full-precision iterations are calculated just // as above, but with U := 2^-W and taking extra decrementing into account. // We need at least one such iteration. // Simulating operations on a twice_rep_t to perform a single final full-width // iteration. Using ad-hoc multiplication implementations to take advantage // of particular structure of operands. let blo: u32 = (CastInto::::cast(b_uq1)) & lo_mask; // x_UQ0 = x_UQ0_hw * 2^HW - 1 // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 // // <--- higher half ---><--- lower half ---> // [x_UQ0_hw * b_UQ1_hw] // + [ x_UQ0_hw * blo ] // - [ b_UQ1 ] // = [ result ][.... discarded ...] let corr_uq1 = negate_u32( (x_uq0_hw as u32) * (b_uq1_hw as u32) + (((x_uq0_hw as u32) * (blo)) >> hw) - 1, ); // account for *possible* carry let lo_corr = corr_uq1 & lo_mask; let hi_corr = corr_uq1 >> hw; // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 let mut x_uq0: ::Int = ((((x_uq0_hw as u32) * hi_corr) << 1) .wrapping_add(((x_uq0_hw as u32) * lo_corr) >> (hw - 1)) .wrapping_sub(2)) .cast(); // 1 to account for the highest bit of corr_UQ1 can be 1 // 1 to account for possible carry // Just like the case of half-width iterations but with possibility // of overflowing by one extra Ulp of x_UQ0. x_uq0 -= one; // ... and then traditional fixup by 2 should work // On error estimation: // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW // + (2^-HW + 2^-W)) // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW // Then like for the half-width iterations: // With 0 <= eps1, eps2 < 2^-W // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ] // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ] x_uq0 } else { // C is (3/4 + 1/sqrt(2)) - 1 truncated to 32 fractional bits as UQ0.n let c: ::Int = (0x7504F333 << (F::BITS - 32)).cast(); let x_uq0: ::Int = c.wrapping_sub(b_uq1); // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-32 x_uq0 }; let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS { for _ in 0..NUMBER_OF_FULL_ITERATIONS { let corr_uq1: u32 = 0.wrapping_sub( ((CastInto::::cast(x_uq0) as u64) * (CastInto::::cast(b_uq1) as u64)) >> F::BITS, ) as u32; x_uq0 = ((((CastInto::::cast(x_uq0) as u64) * (corr_uq1 as u64)) >> (F::BITS - 1)) as u32) .cast(); } x_uq0 } else { // not using native full iterations x_uq0 }; // Finally, account for possible overflow, as explained above. x_uq0 = x_uq0.wrapping_sub(2.cast()); // u_n for different precisions (with N-1 half-width iterations): // W0 is the precision of C // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW // Estimated with bc: // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 // u_3 | < 7.31 | | < 7.31 | < 27054456580 // u_4 | | | | < 80.4 // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 // Add 2 to U_N due to final decrement. let reciprocal_precision: ::Int = 10.cast(); // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W let x_uq0 = x_uq0 - reciprocal_precision; // Now 1/b - (2*P) * 2^-W < x < 1/b // FIXME Is x_UQ0 still >= 0.5? let mut quotient: ::Int = x_uq0.widen_mul(a_significand << 1).hi(); // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), // adjust it to be in [1.0, 2.0) as UQ1.SB. let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) { // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, // effectively doubling its value as well as its error estimation. let residual_lo = (a_significand << (significand_bits + 1)).wrapping_sub( (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) .cast(), ); a_significand <<= 1; (residual_lo, written_exponent.wrapping_sub(1)) } else { // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it // to UQ1.SB by right shifting by 1. Least significant bit is omitted. quotient >>= 1; let residual_lo = (a_significand << significand_bits).wrapping_sub( (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) .cast(), ); (residual_lo, written_exponent) }; //drop mutability let quotient = quotient; // NB: residualLo is calculated above for the normal result case. // It is re-computed on denormal path that is expected to be not so // performance-sensitive. // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB // Each NextAfter() increments the floating point value by at least 2^-SB // (more, if exponent was incremented). // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): // q // | | * | | | | | // <---> 2^t // | | | | | * | | // q // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB // (8*P) * 2^-W < 0.5 * 2^-SB // P < 2^(W-4-SB) // Generally, for at most R NextAfter() to be enough, // P < (2*R - 1) * 2^(W-4-SB) // For f32 (0+3): 10 < 32 (OK) // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required // For f64: 220 < 256 (OK) // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) // If we have overflowed the exponent, return infinity if written_exponent >= max_exponent as i32 { return F::from_repr(inf_rep | quotient_sign); } // Now, quotient <= the correctly-rounded result // and may need taking NextAfter() up to 3 times (see error estimates above) // r = a - b * q let abs_result = if written_exponent > 0 { let mut ret = quotient & significand_mask; ret |= ((written_exponent as u32) << significand_bits).cast(); residual <<= 1; ret } else { if (significand_bits as i32 + written_exponent) < 0 { return F::from_repr(quotient_sign); } let ret = quotient.wrapping_shr(negate_u32(CastInto::::cast(written_exponent)) + 1); residual = (CastInto::::cast( a_significand.wrapping_shl( significand_bits.wrapping_add(CastInto::::cast(written_exponent)), ), ) .wrapping_sub( (CastInto::::cast(ret).wrapping_mul(CastInto::::cast(b_significand))) << 1, )) .cast(); ret }; // Round let abs_result = { residual += abs_result & one; // tie to even // The above line conditionally turns the below LT comparison into LTE if residual > b_significand { abs_result + one } else { abs_result } }; F::from_repr(abs_result | quotient_sign) } fn div64(a: F, b: F) -> F where u32: CastInto, F::Int: CastInto, i32: CastInto, F::Int: CastInto, u64: CastInto, F::Int: CastInto, i64: CastInto, F::Int: CastInto, F::Int: HInt, { const NUMBER_OF_HALF_ITERATIONS: usize = 3; const NUMBER_OF_FULL_ITERATIONS: usize = 1; const USE_NATIVE_FULL_ITERATIONS: bool = false; let one = F::Int::ONE; let zero = F::Int::ZERO; let hw = F::BITS / 2; let lo_mask = u64::MAX >> hw; let significand_bits = F::SIGNIFICAND_BITS; let max_exponent = F::EXPONENT_MAX; let exponent_bias = F::EXPONENT_BIAS; let implicit_bit = F::IMPLICIT_BIT; let significand_mask = F::SIGNIFICAND_MASK; let sign_bit = F::SIGN_MASK as F::Int; let abs_mask = sign_bit - one; let exponent_mask = F::EXPONENT_MASK; let inf_rep = exponent_mask; let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; #[inline(always)] fn negate_u64(a: u64) -> u64 { (::wrapping_neg(a as i64)) as u64 } let a_rep = a.repr(); let b_rep = b.repr(); let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); let quotient_sign = (a_rep ^ b_rep) & sign_bit; let mut a_significand = a_rep & significand_mask; let mut b_significand = b_rep & significand_mask; let mut scale = 0; // Detect if a or b is zero, denormal, infinity, or NaN. if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() { let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; // NaN / anything = qNaN if a_abs > inf_rep { return F::from_repr(a_rep | quiet_bit); } // anything / NaN = qNaN if b_abs > inf_rep { return F::from_repr(b_rep | quiet_bit); } if a_abs == inf_rep { if b_abs == inf_rep { // infinity / infinity = NaN return F::from_repr(qnan_rep); } else { // infinity / anything else = +/- infinity return F::from_repr(a_abs | quotient_sign); } } // anything else / infinity = +/- 0 if b_abs == inf_rep { return F::from_repr(quotient_sign); } if a_abs == zero { if b_abs == zero { // zero / zero = NaN return F::from_repr(qnan_rep); } else { // zero / anything else = +/- zero return F::from_repr(quotient_sign); } } // anything else / zero = +/- infinity if b_abs == zero { return F::from_repr(inf_rep | quotient_sign); } // one or both of a or b is denormal, the other (if applicable) is a // normal number. Renormalize one or both of a and b, and set scale to // include the necessary exponent adjustment. if a_abs < implicit_bit { let (exponent, significand) = F::normalize(a_significand); scale += exponent; a_significand = significand; } if b_abs < implicit_bit { let (exponent, significand) = F::normalize(b_significand); scale -= exponent; b_significand = significand; } } // Set the implicit significand bit. If we fell through from the // denormal path it was already set by normalize( ), but setting it twice // won't hurt anything. a_significand |= implicit_bit; b_significand |= implicit_bit; let written_exponent: i64 = CastInto::::cast( a_exponent .wrapping_sub(b_exponent) .wrapping_add(scale.cast()), ) .wrapping_add(exponent_bias as u64) as i64; let b_uq1 = b_significand << (F::BITS - significand_bits - 1); // Align the significand of b as a UQ1.(n-1) fixed-point number in the range // [1.0, 2.0) and get a UQ0.n approximate reciprocal using a small minimax // polynomial approximation: x0 = 3/4 + 1/sqrt(2) - b/2. // The max error for this approximation is achieved at endpoints, so // abs(x0(b) - 1/b) <= abs(x0(1) - 1/1) = 3/4 - 1/sqrt(2) = 0.04289..., // which is about 4.5 bits. // The initial approximation is between x0(1.0) = 0.9571... and x0(2.0) = 0.4571... // Then, refine the reciprocal estimate using a quadratically converging // Newton-Raphson iteration: // x_{n+1} = x_n * (2 - x_n * b) // // Let b be the original divisor considered "in infinite precision" and // obtained from IEEE754 representation of function argument (with the // implicit bit set). Corresponds to rep_t-sized b_UQ1 represented in // UQ1.(W-1). // // Let b_hw be an infinitely precise number obtained from the highest (HW-1) // bits of divisor significand (with the implicit bit set). Corresponds to // half_rep_t-sized b_UQ1_hw represented in UQ1.(HW-1) that is a **truncated** // version of b_UQ1. // // Let e_n := x_n - 1/b_hw // E_n := x_n - 1/b // abs(E_n) <= abs(e_n) + (1/b_hw - 1/b) // = abs(e_n) + (b - b_hw) / (b*b_hw) // <= abs(e_n) + 2 * 2^-HW // rep_t-sized iterations may be slower than the corresponding half-width // variant depending on the handware and whether single/double/quad precision // is selected. // NB: Using half-width iterations increases computation errors due to // rounding, so error estimations have to be computed taking the selected // mode into account! let mut x_uq0 = if NUMBER_OF_HALF_ITERATIONS > 0 { // Starting with (n-1) half-width iterations let b_uq1_hw: u32 = (CastInto::::cast(b_significand) >> (significand_bits + 1 - hw)) as u32; // C is (3/4 + 1/sqrt(2)) - 1 truncated to W0 fractional bits as UQ0.HW // with W0 being either 16 or 32 and W0 <= HW. // That is, C is the aforementioned 3/4 + 1/sqrt(2) constant (from which // b/2 is subtracted to obtain x0) wrapped to [0, 1) range. // HW is at least 32. Shifting into the highest bits if needed. let c_hw = (0x7504F333_u64 as u32).wrapping_shl(hw.wrapping_sub(32)); // b >= 1, thus an upper bound for 3/4 + 1/sqrt(2) - b/2 is about 0.9572, // so x0 fits to UQ0.HW without wrapping. let x_uq0_hw: u32 = { let mut x_uq0_hw: u32 = c_hw.wrapping_sub(b_uq1_hw /* exact b_hw/2 as UQ0.HW */); // dbg!(x_uq0_hw); // An e_0 error is comprised of errors due to // * x0 being an inherently imprecise first approximation of 1/b_hw // * C_hw being some (irrational) number **truncated** to W0 bits // Please note that e_0 is calculated against the infinitely precise // reciprocal of b_hw (that is, **truncated** version of b). // // e_0 <= 3/4 - 1/sqrt(2) + 2^-W0 // By construction, 1 <= b < 2 // f(x) = x * (2 - b*x) = 2*x - b*x^2 // f'(x) = 2 * (1 - b*x) // // On the [0, 1] interval, f(0) = 0, // then it increses until f(1/b) = 1 / b, maximum on (0, 1), // then it decreses to f(1) = 2 - b // // Let g(x) = x - f(x) = b*x^2 - x. // On (0, 1/b), g(x) < 0 <=> f(x) > x // On (1/b, 1], g(x) > 0 <=> f(x) < x // // For half-width iterations, b_hw is used instead of b. for _ in 0..NUMBER_OF_HALF_ITERATIONS { // corr_UQ1_hw can be **larger** than 2 - b_hw*x by at most 1*Ulp // of corr_UQ1_hw. // "0.0 - (...)" is equivalent to "2.0 - (...)" in UQ1.(HW-1). // On the other hand, corr_UQ1_hw should not overflow from 2.0 to 0.0 provided // no overflow occurred earlier: ((rep_t)x_UQ0_hw * b_UQ1_hw >> HW) is // expected to be strictly positive because b_UQ1_hw has its highest bit set // and x_UQ0_hw should be rather large (it converges to 1/2 < 1/b_hw <= 1). let corr_uq1_hw: u32 = 0.wrapping_sub(((x_uq0_hw as u64).wrapping_mul(b_uq1_hw as u64)) >> hw) as u32; // dbg!(corr_uq1_hw); // Now, we should multiply UQ0.HW and UQ1.(HW-1) numbers, naturally // obtaining an UQ1.(HW-1) number and proving its highest bit could be // considered to be 0 to be able to represent it in UQ0.HW. // From the above analysis of f(x), if corr_UQ1_hw would be represented // without any intermediate loss of precision (that is, in twice_rep_t) // x_UQ0_hw could be at most [1.]000... if b_hw is exactly 1.0 and strictly // less otherwise. On the other hand, to obtain [1.]000..., one have to pass // 1/b_hw == 1.0 to f(x), so this cannot occur at all without overflow (due // to 1.0 being not representable as UQ0.HW). // The fact corr_UQ1_hw was virtually round up (due to result of // multiplication being **first** truncated, then negated - to improve // error estimations) can increase x_UQ0_hw by up to 2*Ulp of x_UQ0_hw. x_uq0_hw = ((x_uq0_hw as u64).wrapping_mul(corr_uq1_hw as u64) >> (hw - 1)) as u32; // dbg!(x_uq0_hw); // Now, either no overflow occurred or x_UQ0_hw is 0 or 1 in its half_rep_t // representation. In the latter case, x_UQ0_hw will be either 0 or 1 after // any number of iterations, so just subtract 2 from the reciprocal // approximation after last iteration. // In infinite precision, with 0 <= eps1, eps2 <= U = 2^-HW: // corr_UQ1_hw = 2 - (1/b_hw + e_n) * b_hw + 2*eps1 // = 1 - e_n * b_hw + 2*eps1 // x_UQ0_hw = (1/b_hw + e_n) * (1 - e_n*b_hw + 2*eps1) - eps2 // = 1/b_hw - e_n + 2*eps1/b_hw + e_n - e_n^2*b_hw + 2*e_n*eps1 - eps2 // = 1/b_hw + 2*eps1/b_hw - e_n^2*b_hw + 2*e_n*eps1 - eps2 // e_{n+1} = -e_n^2*b_hw + 2*eps1/b_hw + 2*e_n*eps1 - eps2 // = 2*e_n*eps1 - (e_n^2*b_hw + eps2) + 2*eps1/b_hw // \------ >0 -------/ \-- >0 ---/ // abs(e_{n+1}) <= 2*abs(e_n)*U + max(2*e_n^2 + U, 2 * U) } // For initial half-width iterations, U = 2^-HW // Let abs(e_n) <= u_n * U, // then abs(e_{n+1}) <= 2 * u_n * U^2 + max(2 * u_n^2 * U^2 + U, 2 * U) // u_{n+1} <= 2 * u_n * U + max(2 * u_n^2 * U + 1, 2) // Account for possible overflow (see above). For an overflow to occur for the // first time, for "ideal" corr_UQ1_hw (that is, without intermediate // truncation), the result of x_UQ0_hw * corr_UQ1_hw should be either maximum // value representable in UQ0.HW or less by 1. This means that 1/b_hw have to // be not below that value (see g(x) above), so it is safe to decrement just // once after the final iteration. On the other hand, an effective value of // divisor changes after this point (from b_hw to b), so adjust here. x_uq0_hw.wrapping_sub(1_u32) }; // Error estimations for full-precision iterations are calculated just // as above, but with U := 2^-W and taking extra decrementing into account. // We need at least one such iteration. // Simulating operations on a twice_rep_t to perform a single final full-width // iteration. Using ad-hoc multiplication implementations to take advantage // of particular structure of operands. let blo: u64 = (CastInto::::cast(b_uq1)) & lo_mask; // x_UQ0 = x_UQ0_hw * 2^HW - 1 // x_UQ0 * b_UQ1 = (x_UQ0_hw * 2^HW) * (b_UQ1_hw * 2^HW + blo) - b_UQ1 // // <--- higher half ---><--- lower half ---> // [x_UQ0_hw * b_UQ1_hw] // + [ x_UQ0_hw * blo ] // - [ b_UQ1 ] // = [ result ][.... discarded ...] let corr_uq1 = negate_u64( (x_uq0_hw as u64) * (b_uq1_hw as u64) + (((x_uq0_hw as u64) * (blo)) >> hw) - 1, ); // account for *possible* carry let lo_corr = corr_uq1 & lo_mask; let hi_corr = corr_uq1 >> hw; // x_UQ0 * corr_UQ1 = (x_UQ0_hw * 2^HW) * (hi_corr * 2^HW + lo_corr) - corr_UQ1 let mut x_uq0: ::Int = ((((x_uq0_hw as u64) * hi_corr) << 1) .wrapping_add(((x_uq0_hw as u64) * lo_corr) >> (hw - 1)) .wrapping_sub(2)) .cast(); // 1 to account for the highest bit of corr_UQ1 can be 1 // 1 to account for possible carry // Just like the case of half-width iterations but with possibility // of overflowing by one extra Ulp of x_UQ0. x_uq0 -= one; // ... and then traditional fixup by 2 should work // On error estimation: // abs(E_{N-1}) <= (u_{N-1} + 2 /* due to conversion e_n -> E_n */) * 2^-HW // + (2^-HW + 2^-W)) // abs(E_{N-1}) <= (u_{N-1} + 3.01) * 2^-HW // Then like for the half-width iterations: // With 0 <= eps1, eps2 < 2^-W // E_N = 4 * E_{N-1} * eps1 - (E_{N-1}^2 * b + 4 * eps2) + 4 * eps1 / b // abs(E_N) <= 2^-W * [ 4 * abs(E_{N-1}) + max(2 * abs(E_{N-1})^2 * 2^W + 4, 8)) ] // abs(E_N) <= 2^-W * [ 4 * (u_{N-1} + 3.01) * 2^-HW + max(4 + 2 * (u_{N-1} + 3.01)^2, 8) ] x_uq0 } else { // C is (3/4 + 1/sqrt(2)) - 1 truncated to 64 fractional bits as UQ0.n let c: ::Int = (0x7504F333 << (F::BITS - 32)).cast(); let x_uq0: ::Int = c.wrapping_sub(b_uq1); // E_0 <= 3/4 - 1/sqrt(2) + 2 * 2^-64 x_uq0 }; let mut x_uq0 = if USE_NATIVE_FULL_ITERATIONS { for _ in 0..NUMBER_OF_FULL_ITERATIONS { let corr_uq1: u64 = 0.wrapping_sub( (CastInto::::cast(x_uq0) * (CastInto::::cast(b_uq1))) >> F::BITS, ); x_uq0 = ((((CastInto::::cast(x_uq0) as u128) * (corr_uq1 as u128)) >> (F::BITS - 1)) as u64) .cast(); } x_uq0 } else { // not using native full iterations x_uq0 }; // Finally, account for possible overflow, as explained above. x_uq0 = x_uq0.wrapping_sub(2.cast()); // u_n for different precisions (with N-1 half-width iterations): // W0 is the precision of C // u_0 = (3/4 - 1/sqrt(2) + 2^-W0) * 2^HW // Estimated with bc: // define half1(un) { return 2.0 * (un + un^2) / 2.0^hw + 1.0; } // define half2(un) { return 2.0 * un / 2.0^hw + 2.0; } // define full1(un) { return 4.0 * (un + 3.01) / 2.0^hw + 2.0 * (un + 3.01)^2 + 4.0; } // define full2(un) { return 4.0 * (un + 3.01) / 2.0^hw + 8.0; } // | f32 (0 + 3) | f32 (2 + 1) | f64 (3 + 1) | f128 (4 + 1) // u_0 | < 184224974 | < 2812.1 | < 184224974 | < 791240234244348797 // u_1 | < 15804007 | < 242.7 | < 15804007 | < 67877681371350440 // u_2 | < 116308 | < 2.81 | < 116308 | < 499533100252317 // u_3 | < 7.31 | | < 7.31 | < 27054456580 // u_4 | | | | < 80.4 // Final (U_N) | same as u_3 | < 72 | < 218 | < 13920 // Add 2 to U_N due to final decrement. let reciprocal_precision: ::Int = 220.cast(); // Suppose 1/b - P * 2^-W < x < 1/b + P * 2^-W let x_uq0 = x_uq0 - reciprocal_precision; // Now 1/b - (2*P) * 2^-W < x < 1/b // FIXME Is x_UQ0 still >= 0.5? let mut quotient: ::Int = x_uq0.widen_mul(a_significand << 1).hi(); // Now, a/b - 4*P * 2^-W < q < a/b for q= in UQ1.(SB+1+W). // quotient_UQ1 is in [0.5, 2.0) as UQ1.(SB+1), // adjust it to be in [1.0, 2.0) as UQ1.SB. let (mut residual, written_exponent) = if quotient < (implicit_bit << 1) { // Highest bit is 0, so just reinterpret quotient_UQ1 as UQ1.SB, // effectively doubling its value as well as its error estimation. let residual_lo = (a_significand << (significand_bits + 1)).wrapping_sub( (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) .cast(), ); a_significand <<= 1; (residual_lo, written_exponent.wrapping_sub(1)) } else { // Highest bit is 1 (the UQ1.(SB+1) value is in [1, 2)), convert it // to UQ1.SB by right shifting by 1. Least significant bit is omitted. quotient >>= 1; let residual_lo = (a_significand << significand_bits).wrapping_sub( (CastInto::::cast(quotient).wrapping_mul(CastInto::::cast(b_significand))) .cast(), ); (residual_lo, written_exponent) }; //drop mutability let quotient = quotient; // NB: residualLo is calculated above for the normal result case. // It is re-computed on denormal path that is expected to be not so // performance-sensitive. // Now, q cannot be greater than a/b and can differ by at most 8*P * 2^-W + 2^-SB // Each NextAfter() increments the floating point value by at least 2^-SB // (more, if exponent was incremented). // Different cases (<---> is of 2^-SB length, * = a/b that is shown as a midpoint): // q // | | * | | | | | // <---> 2^t // | | | | | * | | // q // To require at most one NextAfter(), an error should be less than 1.5 * 2^-SB. // (8*P) * 2^-W + 2^-SB < 1.5 * 2^-SB // (8*P) * 2^-W < 0.5 * 2^-SB // P < 2^(W-4-SB) // Generally, for at most R NextAfter() to be enough, // P < (2*R - 1) * 2^(W-4-SB) // For f32 (0+3): 10 < 32 (OK) // For f32 (2+1): 32 < 74 < 32 * 3, so two NextAfter() are required // For f64: 220 < 256 (OK) // For f128: 4096 * 3 < 13922 < 4096 * 5 (three NextAfter() are required) // If we have overflowed the exponent, return infinity if written_exponent >= max_exponent as i64 { return F::from_repr(inf_rep | quotient_sign); } // Now, quotient <= the correctly-rounded result // and may need taking NextAfter() up to 3 times (see error estimates above) // r = a - b * q let abs_result = if written_exponent > 0 { let mut ret = quotient & significand_mask; ret |= ((written_exponent as u64) << significand_bits).cast(); residual <<= 1; ret } else { if (significand_bits as i64 + written_exponent) < 0 { return F::from_repr(quotient_sign); } let ret = quotient.wrapping_shr((negate_u64(CastInto::::cast(written_exponent)) + 1) as u32); residual = (CastInto::::cast( a_significand.wrapping_shl( significand_bits.wrapping_add(CastInto::::cast(written_exponent)), ), ) .wrapping_sub( (CastInto::::cast(ret).wrapping_mul(CastInto::::cast(b_significand))) << 1, )) .cast(); ret }; // Round let abs_result = { residual += abs_result & one; // tie to even // conditionally turns the below LT comparison into LTE if residual > b_significand { abs_result + one } else { abs_result } }; F::from_repr(abs_result | quotient_sign) } intrinsics! { #[arm_aeabi_alias = __aeabi_fdiv] pub extern "C" fn __divsf3(a: f32, b: f32) -> f32 { div32(a, b) } #[arm_aeabi_alias = __aeabi_ddiv] pub extern "C" fn __divdf3(a: f64, b: f64) -> f64 { div64(a, b) } #[cfg(target_arch = "arm")] pub extern "C" fn __divsf3vfp(a: f32, b: f32) -> f32 { a / b } #[cfg(target_arch = "arm")] pub extern "C" fn __divdf3vfp(a: f64, b: f64) -> f64 { a / b } } compiler_builtins-0.1.101/src/float/extend.rs000064400000000000000000000063571046102023000172530ustar 00000000000000use crate::float::Float; use crate::int::{CastInto, Int}; /// Generic conversion from a narrower to a wider IEEE-754 floating-point type fn extend(a: F) -> R where F::Int: CastInto, u64: CastInto, u32: CastInto, R::Int: CastInto, R::Int: CastInto, u64: CastInto, F::Int: CastInto, { let src_zero = F::Int::ZERO; let src_one = F::Int::ONE; let src_bits = F::BITS; let src_sign_bits = F::SIGNIFICAND_BITS; let src_exp_bias = F::EXPONENT_BIAS; let src_min_normal = F::IMPLICIT_BIT; let src_infinity = F::EXPONENT_MASK; let src_sign_mask = F::SIGN_MASK as F::Int; let src_abs_mask = src_sign_mask - src_one; let src_qnan = F::SIGNIFICAND_MASK; let src_nan_code = src_qnan - src_one; let dst_bits = R::BITS; let dst_sign_bits = R::SIGNIFICAND_BITS; let dst_inf_exp = R::EXPONENT_MAX; let dst_exp_bias = R::EXPONENT_BIAS; let dst_min_normal = R::IMPLICIT_BIT; let sign_bits_delta = dst_sign_bits - src_sign_bits; let exp_bias_delta = dst_exp_bias - src_exp_bias; let a_abs = a.repr() & src_abs_mask; let mut abs_result = R::Int::ZERO; if a_abs.wrapping_sub(src_min_normal) < src_infinity.wrapping_sub(src_min_normal) { // a is a normal number. // Extend to the destination type by shifting the significand and // exponent into the proper position and rebiasing the exponent. let abs_dst: R::Int = a_abs.cast(); let bias_dst: R::Int = exp_bias_delta.cast(); abs_result = abs_dst.wrapping_shl(sign_bits_delta); abs_result += bias_dst.wrapping_shl(dst_sign_bits); } else if a_abs >= src_infinity { // a is NaN or infinity. // Conjure the result by beginning with infinity, then setting the qNaN // bit (if needed) and right-aligning the rest of the trailing NaN // payload field. let qnan_dst: R::Int = (a_abs & src_qnan).cast(); let nan_code_dst: R::Int = (a_abs & src_nan_code).cast(); let inf_exp_dst: R::Int = dst_inf_exp.cast(); abs_result = inf_exp_dst.wrapping_shl(dst_sign_bits); abs_result |= qnan_dst.wrapping_shl(sign_bits_delta); abs_result |= nan_code_dst.wrapping_shl(sign_bits_delta); } else if a_abs != src_zero { // a is denormal. // Renormalize the significand and clear the leading bit, then insert // the correct adjusted exponent in the destination type. let scale = a_abs.leading_zeros() - src_min_normal.leading_zeros(); let abs_dst: R::Int = a_abs.cast(); let bias_dst: R::Int = (exp_bias_delta - scale + 1).cast(); abs_result = abs_dst.wrapping_shl(sign_bits_delta + scale); abs_result = (abs_result ^ dst_min_normal) | (bias_dst.wrapping_shl(dst_sign_bits)); } let sign_result: R::Int = (a.repr() & src_sign_mask).cast(); R::from_repr(abs_result | (sign_result.wrapping_shl(dst_bits - src_bits))) } intrinsics! { #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_f2d] pub extern "C" fn __extendsfdf2(a: f32) -> f64 { extend(a) } #[cfg(target_arch = "arm")] pub extern "C" fn __extendsfdf2vfp(a: f32) -> f64 { a as f64 // LLVM generate 'fcvtds' } } compiler_builtins-0.1.101/src/float/mod.rs000064400000000000000000000125671046102023000165430ustar 00000000000000use core::ops; use super::int::Int; pub mod add; pub mod cmp; pub mod conv; pub mod div; pub mod extend; pub mod mul; pub mod pow; pub mod sub; pub mod trunc; public_test_dep! { /// Trait for some basic operations on floats pub(crate) trait Float: Copy + core::fmt::Debug + PartialEq + PartialOrd + ops::AddAssign + ops::MulAssign + ops::Add + ops::Sub + ops::Div + ops::Rem { /// A uint of the same width as the float type Int: Int; /// A int of the same width as the float type SignedInt: Int; /// An int capable of containing the exponent bits plus a sign bit. This is signed. type ExpInt: Int; const ZERO: Self; const ONE: Self; /// The bitwidth of the float type const BITS: u32; /// The bitwidth of the significand const SIGNIFICAND_BITS: u32; /// The bitwidth of the exponent const EXPONENT_BITS: u32 = Self::BITS - Self::SIGNIFICAND_BITS - 1; /// The maximum value of the exponent const EXPONENT_MAX: u32 = (1 << Self::EXPONENT_BITS) - 1; /// The exponent bias value const EXPONENT_BIAS: u32 = Self::EXPONENT_MAX >> 1; /// A mask for the sign bit const SIGN_MASK: Self::Int; /// A mask for the significand const SIGNIFICAND_MASK: Self::Int; // The implicit bit of the float format const IMPLICIT_BIT: Self::Int; /// A mask for the exponent const EXPONENT_MASK: Self::Int; /// Returns `self` transmuted to `Self::Int` fn repr(self) -> Self::Int; /// Returns `self` transmuted to `Self::SignedInt` fn signed_repr(self) -> Self::SignedInt; /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be /// represented in multiple different ways. This method returns `true` if two NaNs are /// compared. fn eq_repr(self, rhs: Self) -> bool; /// Returns the sign bit fn sign(self) -> bool; /// Returns the exponent with bias fn exp(self) -> Self::ExpInt; /// Returns the significand with no implicit bit (or the "fractional" part) fn frac(self) -> Self::Int; /// Returns the significand with implicit bit fn imp_frac(self) -> Self::Int; /// Returns a `Self::Int` transmuted back to `Self` fn from_repr(a: Self::Int) -> Self; /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position. fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self; /// Returns (normalized exponent, normalized significand) fn normalize(significand: Self::Int) -> (i32, Self::Int); /// Returns if `self` is subnormal fn is_subnormal(self) -> bool; } } macro_rules! float_impl { ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => { impl Float for $ty { type Int = $ity; type SignedInt = $sity; type ExpInt = $expty; const ZERO: Self = 0.0; const ONE: Self = 1.0; const BITS: u32 = $bits; const SIGNIFICAND_BITS: u32 = $significand_bits; const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1); const SIGNIFICAND_MASK: Self::Int = (1 << Self::SIGNIFICAND_BITS) - 1; const IMPLICIT_BIT: Self::Int = 1 << Self::SIGNIFICAND_BITS; const EXPONENT_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIGNIFICAND_MASK); fn repr(self) -> Self::Int { self.to_bits() } fn signed_repr(self) -> Self::SignedInt { self.to_bits() as Self::SignedInt } fn eq_repr(self, rhs: Self) -> bool { if self.is_nan() && rhs.is_nan() { true } else { self.repr() == rhs.repr() } } fn sign(self) -> bool { self.signed_repr() < Self::SignedInt::ZERO } fn exp(self) -> Self::ExpInt { ((self.to_bits() & Self::EXPONENT_MASK) >> Self::SIGNIFICAND_BITS) as Self::ExpInt } fn frac(self) -> Self::Int { self.to_bits() & Self::SIGNIFICAND_MASK } fn imp_frac(self) -> Self::Int { self.frac() | Self::IMPLICIT_BIT } fn from_repr(a: Self::Int) -> Self { Self::from_bits(a) } fn from_parts(sign: bool, exponent: Self::Int, significand: Self::Int) -> Self { Self::from_repr( ((sign as Self::Int) << (Self::BITS - 1)) | ((exponent << Self::SIGNIFICAND_BITS) & Self::EXPONENT_MASK) | (significand & Self::SIGNIFICAND_MASK), ) } fn normalize(significand: Self::Int) -> (i32, Self::Int) { let shift = significand .leading_zeros() .wrapping_sub((Self::Int::ONE << Self::SIGNIFICAND_BITS).leading_zeros()); ( 1i32.wrapping_sub(shift as i32), significand << shift as Self::Int, ) } fn is_subnormal(self) -> bool { (self.repr() & Self::EXPONENT_MASK) == Self::Int::ZERO } } }; } float_impl!(f32, u32, i32, i16, 32, 23); float_impl!(f64, u64, i64, i16, 64, 52); compiler_builtins-0.1.101/src/float/mul.rs000064400000000000000000000154551046102023000165600ustar 00000000000000use crate::float::Float; use crate::int::{CastInto, DInt, HInt, Int}; fn mul(a: F, b: F) -> F where u32: CastInto, F::Int: CastInto, i32: CastInto, F::Int: CastInto, F::Int: HInt, { let one = F::Int::ONE; let zero = F::Int::ZERO; let bits = F::BITS; let significand_bits = F::SIGNIFICAND_BITS; let max_exponent = F::EXPONENT_MAX; let exponent_bias = F::EXPONENT_BIAS; let implicit_bit = F::IMPLICIT_BIT; let significand_mask = F::SIGNIFICAND_MASK; let sign_bit = F::SIGN_MASK as F::Int; let abs_mask = sign_bit - one; let exponent_mask = F::EXPONENT_MASK; let inf_rep = exponent_mask; let quiet_bit = implicit_bit >> 1; let qnan_rep = exponent_mask | quiet_bit; let exponent_bits = F::EXPONENT_BITS; let a_rep = a.repr(); let b_rep = b.repr(); let a_exponent = (a_rep >> significand_bits) & max_exponent.cast(); let b_exponent = (b_rep >> significand_bits) & max_exponent.cast(); let product_sign = (a_rep ^ b_rep) & sign_bit; let mut a_significand = a_rep & significand_mask; let mut b_significand = b_rep & significand_mask; let mut scale = 0; // Detect if a or b is zero, denormal, infinity, or NaN. if a_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() || b_exponent.wrapping_sub(one) >= (max_exponent - 1).cast() { let a_abs = a_rep & abs_mask; let b_abs = b_rep & abs_mask; // NaN + anything = qNaN if a_abs > inf_rep { return F::from_repr(a_rep | quiet_bit); } // anything + NaN = qNaN if b_abs > inf_rep { return F::from_repr(b_rep | quiet_bit); } if a_abs == inf_rep { if b_abs != zero { // infinity * non-zero = +/- infinity return F::from_repr(a_abs | product_sign); } else { // infinity * zero = NaN return F::from_repr(qnan_rep); } } if b_abs == inf_rep { if a_abs != zero { // infinity * non-zero = +/- infinity return F::from_repr(b_abs | product_sign); } else { // infinity * zero = NaN return F::from_repr(qnan_rep); } } // zero * anything = +/- zero if a_abs == zero { return F::from_repr(product_sign); } // anything * zero = +/- zero if b_abs == zero { return F::from_repr(product_sign); } // one or both of a or b is denormal, the other (if applicable) is a // normal number. Renormalize one or both of a and b, and set scale to // include the necessary exponent adjustment. if a_abs < implicit_bit { let (exponent, significand) = F::normalize(a_significand); scale += exponent; a_significand = significand; } if b_abs < implicit_bit { let (exponent, significand) = F::normalize(b_significand); scale += exponent; b_significand = significand; } } // Or in the implicit significand bit. (If we fell through from the // denormal path it was already set by normalize( ), but setting it twice // won't hurt anything.) a_significand |= implicit_bit; b_significand |= implicit_bit; // Get the significand of a*b. Before multiplying the significands, shift // one of them left to left-align it in the field. Thus, the product will // have (exponentBits + 2) integral digits, all but two of which must be // zero. Normalizing this result is just a conditional left-shift by one // and bumping the exponent accordingly. let (mut product_low, mut product_high) = a_significand .widen_mul(b_significand << exponent_bits) .lo_hi(); let a_exponent_i32: i32 = a_exponent.cast(); let b_exponent_i32: i32 = b_exponent.cast(); let mut product_exponent: i32 = a_exponent_i32 .wrapping_add(b_exponent_i32) .wrapping_add(scale) .wrapping_sub(exponent_bias as i32); // Normalize the significand, adjust exponent if needed. if (product_high & implicit_bit) != zero { product_exponent = product_exponent.wrapping_add(1); } else { product_high = (product_high << 1) | (product_low >> (bits - 1)); product_low <<= 1; } // If we have overflowed the type, return +/- infinity. if product_exponent >= max_exponent as i32 { return F::from_repr(inf_rep | product_sign); } if product_exponent <= 0 { // Result is denormal before rounding // // If the result is so small that it just underflows to zero, return // a zero of the appropriate sign. Mathematically there is no need to // handle this case separately, but we make it a special case to // simplify the shift logic. let shift = one.wrapping_sub(product_exponent.cast()).cast(); if shift >= bits { return F::from_repr(product_sign); } // Otherwise, shift the significand of the result so that the round // bit is the high bit of productLo. if shift < bits { let sticky = product_low << (bits - shift); product_low = product_high << (bits - shift) | product_low >> shift | sticky; product_high >>= shift; } else if shift < (2 * bits) { let sticky = product_high << (2 * bits - shift) | product_low; product_low = product_high >> (shift - bits) | sticky; product_high = zero; } else { product_high = zero; } } else { // Result is normal before rounding; insert the exponent. product_high &= significand_mask; product_high |= product_exponent.cast() << significand_bits; } // Insert the sign of the result: product_high |= product_sign; // Final rounding. The final result may overflow to infinity, or underflow // to zero, but those are the correct results in those cases. We use the // default IEEE-754 round-to-nearest, ties-to-even rounding mode. if product_low > sign_bit { product_high += one; } if product_low == sign_bit { product_high += product_high & one; } F::from_repr(product_high) } intrinsics! { #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_fmul] pub extern "C" fn __mulsf3(a: f32, b: f32) -> f32 { mul(a, b) } #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_dmul] pub extern "C" fn __muldf3(a: f64, b: f64) -> f64 { mul(a, b) } #[cfg(target_arch = "arm")] pub extern "C" fn __mulsf3vfp(a: f32, b: f32) -> f32 { a * b } #[cfg(target_arch = "arm")] pub extern "C" fn __muldf3vfp(a: f64, b: f64) -> f64 { a * b } } compiler_builtins-0.1.101/src/float/pow.rs000064400000000000000000000012051046102023000165540ustar 00000000000000use crate::float::Float; use crate::int::Int; /// Returns `a` raised to the power `b` fn pow(a: F, b: i32) -> F { let mut a = a; let recip = b < 0; let mut pow = Int::abs_diff(b, 0); let mut mul = F::ONE; loop { if (pow & 1) != 0 { mul *= a; } pow >>= 1; if pow == 0 { break; } a *= a; } if recip { F::ONE / mul } else { mul } } intrinsics! { pub extern "C" fn __powisf2(a: f32, b: i32) -> f32 { pow(a, b) } pub extern "C" fn __powidf2(a: f64, b: i32) -> f64 { pow(a, b) } } compiler_builtins-0.1.101/src/float/sub.rs000064400000000000000000000012231046102023000165400ustar 00000000000000use crate::float::add::__adddf3; use crate::float::add::__addsf3; use crate::float::Float; intrinsics! { #[arm_aeabi_alias = __aeabi_fsub] pub extern "C" fn __subsf3(a: f32, b: f32) -> f32 { __addsf3(a, f32::from_repr(b.repr() ^ f32::SIGN_MASK)) } #[arm_aeabi_alias = __aeabi_dsub] pub extern "C" fn __subdf3(a: f64, b: f64) -> f64 { __adddf3(a, f64::from_repr(b.repr() ^ f64::SIGN_MASK)) } #[cfg(target_arch = "arm")] pub extern "C" fn __subsf3vfp(a: f32, b: f32) -> f32 { a - b } #[cfg(target_arch = "arm")] pub extern "C" fn __subdf3vfp(a: f64, b: f64) -> f64 { a - b } } compiler_builtins-0.1.101/src/float/trunc.rs000064400000000000000000000112271046102023000171070ustar 00000000000000use crate::float::Float; use crate::int::{CastInto, Int}; fn trunc(a: F) -> R where F::Int: CastInto, F::Int: CastInto, u64: CastInto, u32: CastInto, R::Int: CastInto, u32: CastInto, F::Int: CastInto, { let src_zero = F::Int::ZERO; let src_one = F::Int::ONE; let src_bits = F::BITS; let src_exp_bias = F::EXPONENT_BIAS; let src_min_normal = F::IMPLICIT_BIT; let src_significand_mask = F::SIGNIFICAND_MASK; let src_infinity = F::EXPONENT_MASK; let src_sign_mask = F::SIGN_MASK; let src_abs_mask = src_sign_mask - src_one; let round_mask = (src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)) - src_one; let halfway = src_one << (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS - 1); let src_qnan = src_one << (F::SIGNIFICAND_BITS - 1); let src_nan_code = src_qnan - src_one; let dst_zero = R::Int::ZERO; let dst_one = R::Int::ONE; let dst_bits = R::BITS; let dst_inf_exp = R::EXPONENT_MAX; let dst_exp_bias = R::EXPONENT_BIAS; let underflow_exponent: F::Int = (src_exp_bias + 1 - dst_exp_bias).cast(); let overflow_exponent: F::Int = (src_exp_bias + dst_inf_exp - dst_exp_bias).cast(); let underflow: F::Int = underflow_exponent << F::SIGNIFICAND_BITS; let overflow: F::Int = overflow_exponent << F::SIGNIFICAND_BITS; let dst_qnan = R::Int::ONE << (R::SIGNIFICAND_BITS - 1); let dst_nan_code = dst_qnan - dst_one; let sign_bits_delta = F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS; // Break a into a sign and representation of the absolute value. let a_abs = a.repr() & src_abs_mask; let sign = a.repr() & src_sign_mask; let mut abs_result: R::Int; if a_abs.wrapping_sub(underflow) < a_abs.wrapping_sub(overflow) { // The exponent of a is within the range of normal numbers in the // destination format. We can convert by simply right-shifting with // rounding and adjusting the exponent. abs_result = (a_abs >> sign_bits_delta).cast(); let tmp = src_exp_bias.wrapping_sub(dst_exp_bias) << R::SIGNIFICAND_BITS; abs_result = abs_result.wrapping_sub(tmp.cast()); let round_bits = a_abs & round_mask; if round_bits > halfway { // Round to nearest. abs_result += dst_one; } else if round_bits == halfway { // Tie to even. abs_result += abs_result & dst_one; }; } else if a_abs > src_infinity { // a is NaN. // Conjure the result by beginning with infinity, setting the qNaN // bit and inserting the (truncated) trailing NaN field. abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast(); abs_result |= dst_qnan; abs_result |= dst_nan_code & ((a_abs & src_nan_code) >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast(); } else if a_abs >= overflow { // a overflows to infinity. abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast(); } else { // a underflows on conversion to the destination type or is an exact // zero. The result may be a denormal or zero. Extract the exponent // to get the shift amount for the denormalization. let a_exp: u32 = (a_abs >> F::SIGNIFICAND_BITS).cast(); let shift = src_exp_bias - dst_exp_bias - a_exp + 1; let significand = (a.repr() & src_significand_mask) | src_min_normal; // Right shift by the denormalization amount with sticky. if shift > F::SIGNIFICAND_BITS { abs_result = dst_zero; } else { let sticky = if (significand << (src_bits - shift)) != src_zero { src_one } else { src_zero }; let denormalized_significand: F::Int = significand >> shift | sticky; abs_result = (denormalized_significand >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast(); let round_bits = denormalized_significand & round_mask; // Round to nearest if round_bits > halfway { abs_result += dst_one; } // Ties to even else if round_bits == halfway { abs_result += abs_result & dst_one; }; } } // Apply the signbit to the absolute value. R::from_repr(abs_result | sign.wrapping_shr(src_bits - dst_bits).cast()) } intrinsics! { #[aapcs_on_arm] #[arm_aeabi_alias = __aeabi_d2f] pub extern "C" fn __truncdfsf2(a: f64) -> f32 { trunc(a) } #[cfg(target_arch = "arm")] pub extern "C" fn __truncdfsf2vfp(a: f64) -> f32 { a as f32 } } compiler_builtins-0.1.101/src/int/addsub.rs000064400000000000000000000045431046102023000167060ustar 00000000000000use crate::int::{DInt, Int}; trait UAddSub: DInt { fn uadd(self, other: Self) -> Self { let (lo, carry) = self.lo().overflowing_add(other.lo()); let hi = self.hi().wrapping_add(other.hi()); let carry = if carry { Self::H::ONE } else { Self::H::ZERO }; Self::from_lo_hi(lo, hi.wrapping_add(carry)) } fn uadd_one(self) -> Self { let (lo, carry) = self.lo().overflowing_add(Self::H::ONE); let carry = if carry { Self::H::ONE } else { Self::H::ZERO }; Self::from_lo_hi(lo, self.hi().wrapping_add(carry)) } fn usub(self, other: Self) -> Self { let uneg = (!other).uadd_one(); self.uadd(uneg) } } impl UAddSub for u128 {} trait AddSub: Int where ::UnsignedInt: UAddSub, { fn add(self, other: Self) -> Self { Self::from_unsigned(self.unsigned().uadd(other.unsigned())) } fn sub(self, other: Self) -> Self { Self::from_unsigned(self.unsigned().usub(other.unsigned())) } } impl AddSub for u128 {} impl AddSub for i128 {} trait Addo: AddSub where ::UnsignedInt: UAddSub, { fn addo(self, other: Self) -> (Self, bool) { let sum = AddSub::add(self, other); (sum, (other < Self::ZERO) != (sum < self)) } } impl Addo for i128 {} impl Addo for u128 {} trait Subo: AddSub where ::UnsignedInt: UAddSub, { fn subo(self, other: Self) -> (Self, bool) { let sum = AddSub::sub(self, other); (sum, (other < Self::ZERO) != (self < sum)) } } impl Subo for i128 {} impl Subo for u128 {} intrinsics! { pub extern "C" fn __rust_i128_add(a: i128, b: i128) -> i128 { AddSub::add(a,b) } pub extern "C" fn __rust_i128_addo(a: i128, b: i128) -> (i128, bool) { a.addo(b) } pub extern "C" fn __rust_u128_add(a: u128, b: u128) -> u128 { AddSub::add(a,b) } pub extern "C" fn __rust_u128_addo(a: u128, b: u128) -> (u128, bool) { a.addo(b) } pub extern "C" fn __rust_i128_sub(a: i128, b: i128) -> i128 { AddSub::sub(a,b) } pub extern "C" fn __rust_i128_subo(a: i128, b: i128) -> (i128, bool) { a.subo(b) } pub extern "C" fn __rust_u128_sub(a: u128, b: u128) -> u128 { AddSub::sub(a,b) } pub extern "C" fn __rust_u128_subo(a: u128, b: u128) -> (u128, bool) { a.subo(b) } } compiler_builtins-0.1.101/src/int/leading_zeros.rs000064400000000000000000000127011046102023000202640ustar 00000000000000// Note: these functions happen to produce the correct `usize::leading_zeros(0)` value // without a explicit zero check. Zero is probably common enough that it could warrant // adding a zero check at the beginning, but `__clzsi2` has a precondition that `x != 0`. // Compilers will insert the check for zero in cases where it is needed. public_test_dep! { /// Returns the number of leading binary zeros in `x`. #[allow(dead_code)] pub(crate) fn usize_leading_zeros_default(x: usize) -> usize { // The basic idea is to test if the higher bits of `x` are zero and bisect the number // of leading zeros. It is possible for all branches of the bisection to use the same // code path by conditionally shifting the higher parts down to let the next bisection // step work on the higher or lower parts of `x`. Instead of starting with `z == 0` // and adding to the number of zeros, it is slightly faster to start with // `z == usize::MAX.count_ones()` and subtract from the potential number of zeros, // because it simplifies the final bisection step. let mut x = x; // the number of potential leading zeros let mut z = usize::MAX.count_ones() as usize; // a temporary let mut t: usize; #[cfg(target_pointer_width = "64")] { t = x >> 32; if t != 0 { z -= 32; x = t; } } #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] { t = x >> 16; if t != 0 { z -= 16; x = t; } } t = x >> 8; if t != 0 { z -= 8; x = t; } t = x >> 4; if t != 0 { z -= 4; x = t; } t = x >> 2; if t != 0 { z -= 2; x = t; } // the last two bisections are combined into one conditional t = x >> 1; if t != 0 { z - 2 } else { z - x } // We could potentially save a few cycles by using the LUT trick from // "https://embeddedgurus.com/state-space/2014/09/ // fast-deterministic-and-portable-counting-leading-zeros/". // However, 256 bytes for a LUT is too large for embedded use cases. We could remove // the last 3 bisections and use this 16 byte LUT for the rest of the work: //const LUT: [u8; 16] = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]; //z -= LUT[x] as usize; //z // However, it ends up generating about the same number of instructions. When benchmarked // on x86_64, it is slightly faster to use the LUT, but this is probably because of OOO // execution effects. Changing to using a LUT and branching is risky for smaller cores. } } // The above method does not compile well on RISC-V (because of the lack of predicated // instructions), producing code with many branches or using an excessively long // branchless solution. This method takes advantage of the set-if-less-than instruction on // RISC-V that allows `(x >= power-of-two) as usize` to be branchless. public_test_dep! { /// Returns the number of leading binary zeros in `x`. #[allow(dead_code)] pub(crate) fn usize_leading_zeros_riscv(x: usize) -> usize { let mut x = x; // the number of potential leading zeros let mut z = usize::MAX.count_ones() as usize; // a temporary let mut t: usize; // RISC-V does not have a set-if-greater-than-or-equal instruction and // `(x >= power-of-two) as usize` will get compiled into two instructions, but this is // still the most optimal method. A conditional set can only be turned into a single // immediate instruction if `x` is compared with an immediate `imm` (that can fit into // 12 bits) like `x < imm` but not `imm < x` (because the immediate is always on the // right). If we try to save an instruction by using `x < imm` for each bisection, we // have to shift `x` left and compare with powers of two approaching `usize::MAX + 1`, // but the immediate will never fit into 12 bits and never save an instruction. #[cfg(target_pointer_width = "64")] { // If the upper 32 bits of `x` are not all 0, `t` is set to `1 << 5`, otherwise // `t` is set to 0. t = ((x >= (1 << 32)) as usize) << 5; // If `t` was set to `1 << 5`, then the upper 32 bits are shifted down for the // next step to process. x >>= t; // If `t` was set to `1 << 5`, then we subtract 32 from the number of potential // leading zeros z -= t; } #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] { t = ((x >= (1 << 16)) as usize) << 4; x >>= t; z -= t; } t = ((x >= (1 << 8)) as usize) << 3; x >>= t; z -= t; t = ((x >= (1 << 4)) as usize) << 2; x >>= t; z -= t; t = ((x >= (1 << 2)) as usize) << 1; x >>= t; z -= t; t = (x >= (1 << 1)) as usize; x >>= t; z -= t; // All bits except the LSB are guaranteed to be zero for this final bisection step. // If `x != 0` then `x == 1` and subtracts one potential zero from `z`. z - x } } intrinsics! { #[maybe_use_optimized_c_shim] #[cfg(any( target_pointer_width = "16", target_pointer_width = "32", target_pointer_width = "64" ))] /// Returns the number of leading binary zeros in `x`. pub extern "C" fn __clzsi2(x: usize) -> usize { if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { usize_leading_zeros_riscv(x) } else { usize_leading_zeros_default(x) } } } compiler_builtins-0.1.101/src/int/mod.rs000064400000000000000000000261361046102023000162250ustar 00000000000000use core::ops; mod specialized_div_rem; pub mod addsub; pub mod leading_zeros; pub mod mul; pub mod sdiv; pub mod shift; pub mod udiv; pub use self::leading_zeros::__clzsi2; public_test_dep! { /// Trait for some basic operations on integers pub(crate) trait Int: Copy + core::fmt::Debug + PartialEq + PartialOrd + ops::AddAssign + ops::SubAssign + ops::BitAndAssign + ops::BitOrAssign + ops::BitXorAssign + ops::ShlAssign + ops::ShrAssign + ops::Add + ops::Sub + ops::Div + ops::Shl + ops::Shr + ops::BitOr + ops::BitXor + ops::BitAnd + ops::Not { /// Type with the same width but other signedness type OtherSign: Int; /// Unsigned version of Self type UnsignedInt: Int; /// If `Self` is a signed integer const SIGNED: bool; /// The bitwidth of the int type const BITS: u32; const ZERO: Self; const ONE: Self; const MIN: Self; const MAX: Self; /// LUT used for maximizing the space covered and minimizing the computational cost of fuzzing /// in `testcrate`. For example, Self = u128 produces [0,1,2,7,8,15,16,31,32,63,64,95,96,111, /// 112,119,120,125,126,127]. const FUZZ_LENGTHS: [u8; 20]; /// The number of entries of `FUZZ_LENGTHS` actually used. The maximum is 20 for u128. const FUZZ_NUM: usize; fn unsigned(self) -> Self::UnsignedInt; fn from_unsigned(unsigned: Self::UnsignedInt) -> Self; fn from_bool(b: bool) -> Self; /// Prevents the need for excessive conversions between signed and unsigned fn logical_shr(self, other: u32) -> Self; /// Absolute difference between two integers. fn abs_diff(self, other: Self) -> Self::UnsignedInt; // copied from primitive integers, but put in a trait fn is_zero(self) -> bool; fn wrapping_neg(self) -> Self; fn wrapping_add(self, other: Self) -> Self; fn wrapping_mul(self, other: Self) -> Self; fn wrapping_sub(self, other: Self) -> Self; fn wrapping_shl(self, other: u32) -> Self; fn wrapping_shr(self, other: u32) -> Self; fn rotate_left(self, other: u32) -> Self; fn overflowing_add(self, other: Self) -> (Self, bool); fn leading_zeros(self) -> u32; } } macro_rules! int_impl_common { ($ty:ty) => { const BITS: u32 = ::ZERO.count_zeros(); const SIGNED: bool = Self::MIN != Self::ZERO; const ZERO: Self = 0; const ONE: Self = 1; const MIN: Self = ::MIN; const MAX: Self = ::MAX; const FUZZ_LENGTHS: [u8; 20] = { let bits = ::BITS; let mut v = [0u8; 20]; v[0] = 0; v[1] = 1; v[2] = 2; // important for parity and the iX::MIN case when reversed let mut i = 3; // No need for any more until the byte boundary, because there should be no algorithms // that are sensitive to anything not next to byte boundaries after 2. We also scale // in powers of two, which is important to prevent u128 corner tests from getting too // big. let mut l = 8; loop { if l >= ((bits / 2) as u8) { break; } // get both sides of the byte boundary v[i] = l - 1; i += 1; v[i] = l; i += 1; l *= 2; } if bits != 8 { // add the lower side of the middle boundary v[i] = ((bits / 2) - 1) as u8; i += 1; } // We do not want to jump directly from the Self::BITS/2 boundary to the Self::BITS // boundary because of algorithms that split the high part up. We reverse the scaling // as we go to Self::BITS. let mid = i; let mut j = 1; loop { v[i] = (bits as u8) - (v[mid - j]) - 1; if j == mid { break; } i += 1; j += 1; } v }; const FUZZ_NUM: usize = { let log2 = (::BITS - 1).count_ones() as usize; if log2 == 3 { // case for u8 6 } else { // 3 entries on each extreme, 2 in the middle, and 4 for each scale of intermediate // boundaries. 8 + (4 * (log2 - 4)) } }; fn from_bool(b: bool) -> Self { b as $ty } fn logical_shr(self, other: u32) -> Self { Self::from_unsigned(self.unsigned().wrapping_shr(other)) } fn is_zero(self) -> bool { self == Self::ZERO } fn wrapping_neg(self) -> Self { ::wrapping_neg(self) } fn wrapping_add(self, other: Self) -> Self { ::wrapping_add(self, other) } fn wrapping_mul(self, other: Self) -> Self { ::wrapping_mul(self, other) } fn wrapping_sub(self, other: Self) -> Self { ::wrapping_sub(self, other) } fn wrapping_shl(self, other: u32) -> Self { ::wrapping_shl(self, other) } fn wrapping_shr(self, other: u32) -> Self { ::wrapping_shr(self, other) } fn rotate_left(self, other: u32) -> Self { ::rotate_left(self, other) } fn overflowing_add(self, other: Self) -> (Self, bool) { ::overflowing_add(self, other) } fn leading_zeros(self) -> u32 { ::leading_zeros(self) } }; } macro_rules! int_impl { ($ity:ty, $uty:ty) => { impl Int for $uty { type OtherSign = $ity; type UnsignedInt = $uty; fn unsigned(self) -> $uty { self } // It makes writing macros easier if this is implemented for both signed and unsigned #[allow(clippy::wrong_self_convention)] fn from_unsigned(me: $uty) -> Self { me } fn abs_diff(self, other: Self) -> Self { if self < other { other.wrapping_sub(self) } else { self.wrapping_sub(other) } } int_impl_common!($uty); } impl Int for $ity { type OtherSign = $uty; type UnsignedInt = $uty; fn unsigned(self) -> $uty { self as $uty } fn from_unsigned(me: $uty) -> Self { me as $ity } fn abs_diff(self, other: Self) -> $uty { self.wrapping_sub(other).wrapping_abs() as $uty } int_impl_common!($ity); } }; } int_impl!(isize, usize); int_impl!(i8, u8); int_impl!(i16, u16); int_impl!(i32, u32); int_impl!(i64, u64); int_impl!(i128, u128); public_test_dep! { /// Trait for integers twice the bit width of another integer. This is implemented for all /// primitives except for `u8`, because there is not a smaller primitive. pub(crate) trait DInt: Int { /// Integer that is half the bit width of the integer this trait is implemented for type H: HInt + Int; /// Returns the low half of `self` fn lo(self) -> Self::H; /// Returns the high half of `self` fn hi(self) -> Self::H; /// Returns the low and high halves of `self` as a tuple fn lo_hi(self) -> (Self::H, Self::H); /// Constructs an integer using lower and higher half parts fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self; } } public_test_dep! { /// Trait for integers half the bit width of another integer. This is implemented for all /// primitives except for `u128`, because it there is not a larger primitive. pub(crate) trait HInt: Int { /// Integer that is double the bit width of the integer this trait is implemented for type D: DInt + Int; /// Widens (using default extension) the integer to have double bit width fn widen(self) -> Self::D; /// Widens (zero extension only) the integer to have double bit width. This is needed to get /// around problems with associated type bounds (such as `Int`) being unstable fn zero_widen(self) -> Self::D; /// Widens the integer to have double bit width and shifts the integer into the higher bits fn widen_hi(self) -> Self::D; /// Widening multiplication with zero widening. This cannot overflow. fn zero_widen_mul(self, rhs: Self) -> Self::D; /// Widening multiplication. This cannot overflow. fn widen_mul(self, rhs: Self) -> Self::D; } } macro_rules! impl_d_int { ($($X:ident $D:ident),*) => { $( impl DInt for $D { type H = $X; fn lo(self) -> Self::H { self as $X } fn hi(self) -> Self::H { (self >> <$X as Int>::BITS) as $X } fn lo_hi(self) -> (Self::H, Self::H) { (self.lo(), self.hi()) } fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self { lo.zero_widen() | hi.widen_hi() } } )* }; } macro_rules! impl_h_int { ($($H:ident $uH:ident $X:ident),*) => { $( impl HInt for $H { type D = $X; fn widen(self) -> Self::D { self as $X } fn zero_widen(self) -> Self::D { (self as $uH) as $X } fn widen_hi(self) -> Self::D { (self as $X) << <$H as Int>::BITS } fn zero_widen_mul(self, rhs: Self) -> Self::D { self.zero_widen().wrapping_mul(rhs.zero_widen()) } fn widen_mul(self, rhs: Self) -> Self::D { self.widen().wrapping_mul(rhs.widen()) } } )* }; } impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128); impl_h_int!( u8 u8 u16, u16 u16 u32, u32 u32 u64, u64 u64 u128, i8 u8 i16, i16 u16 i32, i32 u32 i64, i64 u64 i128 ); public_test_dep! { /// Trait to express (possibly lossy) casting of integers pub(crate) trait CastInto: Copy { fn cast(self) -> T; } } macro_rules! cast_into { ($ty:ty) => { cast_into!($ty; usize, isize, u8, i8, u16, i16, u32, i32, u64, i64, u128, i128); }; ($ty:ty; $($into:ty),*) => {$( impl CastInto<$into> for $ty { fn cast(self) -> $into { self as $into } } )*}; } cast_into!(usize); cast_into!(isize); cast_into!(u8); cast_into!(i8); cast_into!(u16); cast_into!(i16); cast_into!(u32); cast_into!(i32); cast_into!(u64); cast_into!(i64); cast_into!(u128); cast_into!(i128); compiler_builtins-0.1.101/src/int/mul.rs000064400000000000000000000110231046102023000162300ustar 00000000000000use crate::int::{DInt, HInt, Int}; trait Mul: DInt where Self::H: DInt, { fn mul(self, rhs: Self) -> Self { // In order to prevent infinite recursion, we cannot use the `widen_mul` in this: //self.lo().widen_mul(rhs.lo()) // .wrapping_add(self.lo().wrapping_mul(rhs.hi()).widen_hi()) // .wrapping_add(self.hi().wrapping_mul(rhs.lo()).widen_hi()) let lhs_lo = self.lo(); let rhs_lo = rhs.lo(); // construct the widening multiplication using only `Self::H` sized multiplications let tmp_0 = lhs_lo.lo().zero_widen_mul(rhs_lo.lo()); let tmp_1 = lhs_lo.lo().zero_widen_mul(rhs_lo.hi()); let tmp_2 = lhs_lo.hi().zero_widen_mul(rhs_lo.lo()); let tmp_3 = lhs_lo.hi().zero_widen_mul(rhs_lo.hi()); // sum up all widening partials let mul = Self::from_lo_hi(tmp_0, tmp_3) .wrapping_add(tmp_1.zero_widen() << (Self::BITS / 4)) .wrapping_add(tmp_2.zero_widen() << (Self::BITS / 4)); // add the higher partials mul.wrapping_add(lhs_lo.wrapping_mul(rhs.hi()).widen_hi()) .wrapping_add(self.hi().wrapping_mul(rhs_lo).widen_hi()) } } impl Mul for u64 {} impl Mul for i128 {} pub(crate) trait UMulo: Int + DInt { fn mulo(self, rhs: Self) -> (Self, bool) { match (self.hi().is_zero(), rhs.hi().is_zero()) { // overflow is guaranteed (false, false) => (self.wrapping_mul(rhs), true), (true, false) => { let mul_lo = self.lo().widen_mul(rhs.lo()); let mul_hi = self.lo().widen_mul(rhs.hi()); let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi()); (mul, o || !mul_hi.hi().is_zero()) } (false, true) => { let mul_lo = rhs.lo().widen_mul(self.lo()); let mul_hi = rhs.lo().widen_mul(self.hi()); let (mul, o) = mul_lo.overflowing_add(mul_hi.lo().widen_hi()); (mul, o || !mul_hi.hi().is_zero()) } // overflow is guaranteed to not happen, and use a smaller widening multiplication (true, true) => (self.lo().widen_mul(rhs.lo()), false), } } } impl UMulo for u32 {} impl UMulo for u64 {} impl UMulo for u128 {} macro_rules! impl_signed_mulo { ($fn:ident, $iD:ident, $uD:ident) => { fn $fn(lhs: $iD, rhs: $iD) -> ($iD, bool) { let mut lhs = lhs; let mut rhs = rhs; // the test against `mul_neg` below fails without this early return if lhs == 0 || rhs == 0 { return (0, false); } let lhs_neg = lhs < 0; let rhs_neg = rhs < 0; if lhs_neg { lhs = lhs.wrapping_neg(); } if rhs_neg { rhs = rhs.wrapping_neg(); } let mul_neg = lhs_neg != rhs_neg; let (mul, o) = (lhs as $uD).mulo(rhs as $uD); let mut mul = mul as $iD; if mul_neg { mul = mul.wrapping_neg(); } if (mul < 0) != mul_neg { // this one check happens to catch all edge cases related to `$iD::MIN` (mul, true) } else { (mul, o) } } }; } impl_signed_mulo!(i32_overflowing_mul, i32, u32); impl_signed_mulo!(i64_overflowing_mul, i64, u64); impl_signed_mulo!(i128_overflowing_mul, i128, u128); intrinsics! { #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lmul] #[cfg(any(not(any(target_arch = "riscv32", target_arch = "riscv64")), target_feature = "m"))] pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { a.mul(b) } pub extern "C" fn __multi3(a: i128, b: i128) -> i128 { a.mul(b) } pub extern "C" fn __mulosi4(a: i32, b: i32, oflow: &mut i32) -> i32 { let (mul, o) = i32_overflowing_mul(a, b); *oflow = o as i32; mul } pub extern "C" fn __mulodi4(a: i64, b: i64, oflow: &mut i32) -> i64 { let (mul, o) = i64_overflowing_mul(a, b); *oflow = o as i32; mul } #[unadjusted_on_win64] pub extern "C" fn __muloti4(a: i128, b: i128, oflow: &mut i32) -> i128 { let (mul, o) = i128_overflowing_mul(a, b); *oflow = o as i32; mul } pub extern "C" fn __rust_i128_mulo(a: i128, b: i128) -> (i128, bool) { i128_overflowing_mul(a, b) } pub extern "C" fn __rust_u128_mulo(a: u128, b: u128) -> (u128, bool) { a.mulo(b) } } compiler_builtins-0.1.101/src/int/sdiv.rs000064400000000000000000000116041046102023000164050ustar 00000000000000use crate::int::udiv::*; macro_rules! sdivmod { ( $unsigned_fn:ident, // name of the unsigned division function $signed_fn:ident, // name of the signed division function $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` $($attr:tt),* // attributes ) => { intrinsics! { #[avr_skip] $( #[$attr] )* /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn $signed_fn(a: $iX, b: $iX, rem: &mut $iX) -> $iX { let a_neg = a < 0; let b_neg = b < 0; let mut a = a; let mut b = b; if a_neg { a = a.wrapping_neg(); } if b_neg { b = b.wrapping_neg(); } let mut r = *rem as $uX; let t = $unsigned_fn(a as $uX, b as $uX, Some(&mut r)) as $iX; let mut r = r as $iX; if a_neg { r = r.wrapping_neg(); } *rem = r; if a_neg != b_neg { t.wrapping_neg() } else { t } } } } } macro_rules! sdiv { ( $unsigned_fn:ident, // name of the unsigned division function $signed_fn:ident, // name of the signed division function $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` $($attr:tt),* // attributes ) => { intrinsics! { #[avr_skip] $( #[$attr] )* /// Returns `n / d` pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX { let a_neg = a < 0; let b_neg = b < 0; let mut a = a; let mut b = b; if a_neg { a = a.wrapping_neg(); } if b_neg { b = b.wrapping_neg(); } let t = $unsigned_fn(a as $uX, b as $uX) as $iX; if a_neg != b_neg { t.wrapping_neg() } else { t } } } } } macro_rules! smod { ( $unsigned_fn:ident, // name of the unsigned division function $signed_fn:ident, // name of the signed division function $uX:ident, // unsigned integer type for the inputs and outputs of `$unsigned_name` $iX:ident, // signed integer type for the inputs and outputs of `$signed_name` $($attr:tt),* // attributes ) => { intrinsics! { #[avr_skip] $( #[$attr] )* /// Returns `n % d` pub extern "C" fn $signed_fn(a: $iX, b: $iX) -> $iX { let a_neg = a < 0; let b_neg = b < 0; let mut a = a; let mut b = b; if a_neg { a = a.wrapping_neg(); } if b_neg { b = b.wrapping_neg(); } let r = $unsigned_fn(a as $uX, b as $uX) as $iX; if a_neg { r.wrapping_neg() } else { r } } } } } sdivmod!( __udivmodsi4, __divmodsi4, u32, i32, maybe_use_optimized_c_shim ); // The `#[arm_aeabi_alias = __aeabi_idiv]` attribute cannot be made to work with `intrinsics!` in macros intrinsics! { #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_idiv] /// Returns `n / d` pub extern "C" fn __divsi3(a: i32, b: i32) -> i32 { let a_neg = a < 0; let b_neg = b < 0; let mut a = a; let mut b = b; if a_neg { a = a.wrapping_neg(); } if b_neg { b = b.wrapping_neg(); } let t = __udivsi3(a as u32, b as u32) as i32; if a_neg != b_neg { t.wrapping_neg() } else { t } } } smod!(__umodsi3, __modsi3, u32, i32, maybe_use_optimized_c_shim); sdivmod!( __udivmoddi4, __divmoddi4, u64, i64, maybe_use_optimized_c_shim ); sdiv!(__udivdi3, __divdi3, u64, i64, maybe_use_optimized_c_shim); smod!(__umoddi3, __moddi3, u64, i64, maybe_use_optimized_c_shim); // LLVM does not currently have a `__divmodti4` function, but GCC does sdivmod!( __udivmodti4, __divmodti4, u128, i128, maybe_use_optimized_c_shim ); sdiv!(__udivti3, __divti3, u128, i128, win64_128bit_abi_hack); smod!(__umodti3, __modti3, u128, i128, win64_128bit_abi_hack); compiler_builtins-0.1.101/src/int/shift.rs000064400000000000000000000062221046102023000165550ustar 00000000000000use crate::int::{DInt, HInt, Int}; trait Ashl: DInt { /// Returns `a << b`, requires `b < Self::BITS` fn ashl(self, shl: u32) -> Self { let n_h = Self::H::BITS; if shl & n_h != 0 { // we only need `self.lo()` because `self.hi()` will be shifted out entirely self.lo().wrapping_shl(shl - n_h).widen_hi() } else if shl == 0 { self } else { Self::from_lo_hi( self.lo().wrapping_shl(shl), self.lo().logical_shr(n_h.wrapping_sub(shl)) | self.hi().wrapping_shl(shl), ) } } } impl Ashl for u32 {} impl Ashl for u64 {} impl Ashl for u128 {} trait Ashr: DInt { /// Returns arithmetic `a >> b`, requires `b < Self::BITS` fn ashr(self, shr: u32) -> Self { let n_h = Self::H::BITS; if shr & n_h != 0 { Self::from_lo_hi( self.hi().wrapping_shr(shr - n_h), // smear the sign bit self.hi().wrapping_shr(n_h - 1), ) } else if shr == 0 { self } else { Self::from_lo_hi( self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h.wrapping_sub(shr)), self.hi().wrapping_shr(shr), ) } } } impl Ashr for i32 {} impl Ashr for i64 {} impl Ashr for i128 {} trait Lshr: DInt { /// Returns logical `a >> b`, requires `b < Self::BITS` fn lshr(self, shr: u32) -> Self { let n_h = Self::H::BITS; if shr & n_h != 0 { self.hi().logical_shr(shr - n_h).zero_widen() } else if shr == 0 { self } else { Self::from_lo_hi( self.lo().logical_shr(shr) | self.hi().wrapping_shl(n_h.wrapping_sub(shr)), self.hi().logical_shr(shr), ) } } } impl Lshr for u32 {} impl Lshr for u64 {} impl Lshr for u128 {} intrinsics! { #[avr_skip] #[maybe_use_optimized_c_shim] pub extern "C" fn __ashlsi3(a: u32, b: u32) -> u32 { a.ashl(b) } #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsl] pub extern "C" fn __ashldi3(a: u64, b: core::ffi::c_uint) -> u64 { a.ashl(b as u32) } #[avr_skip] pub extern "C" fn __ashlti3(a: u128, b: u32) -> u128 { a.ashl(b) } #[avr_skip] #[maybe_use_optimized_c_shim] pub extern "C" fn __ashrsi3(a: i32, b: u32) -> i32 { a.ashr(b) } #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_lasr] pub extern "C" fn __ashrdi3(a: i64, b: core::ffi::c_uint) -> i64 { a.ashr(b as u32) } #[avr_skip] pub extern "C" fn __ashrti3(a: i128, b: u32) -> i128 { a.ashr(b) } #[avr_skip] #[maybe_use_optimized_c_shim] pub extern "C" fn __lshrsi3(a: u32, b: u32) -> u32 { a.lshr(b) } #[avr_skip] #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_llsr] pub extern "C" fn __lshrdi3(a: u64, b: core::ffi::c_uint) -> u64 { a.lshr(b as u32) } #[avr_skip] pub extern "C" fn __lshrti3(a: u128, b: u32) -> u128 { a.lshr(b) } } compiler_builtins-0.1.101/src/int/specialized_div_rem/asymmetric.rs000064400000000000000000000063311046102023000236170ustar 00000000000000/// Creates an unsigned division function optimized for dividing integers with the same /// bitwidth as the largest operand in an asymmetrically sized division. For example, x86-64 has an /// assembly instruction that can divide a 128 bit integer by a 64 bit integer if the quotient fits /// in 64 bits. The 128 bit version of this algorithm would use that fast hardware division to /// construct a full 128 bit by 128 bit division. #[allow(unused_macros)] macro_rules! impl_asymmetric { ( $fn:ident, // name of the unsigned division function $zero_div_fn:ident, // function called when division by zero is attempted $half_division:ident, // function for division of a $uX by a $uX $asymmetric_division:ident, // function for division of a $uD by a $uX $n_h:expr, // the number of bits in a $iH or $uH $uH:ident, // unsigned integer with half the bit width of $uX $uX:ident, // unsigned integer with half the bit width of $uD $uD:ident // unsigned integer type for the inputs and outputs of `$fn` ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { let n: u32 = $n_h * 2; let duo_lo = duo as $uX; let duo_hi = (duo >> n) as $uX; let div_lo = div as $uX; let div_hi = (div >> n) as $uX; if div_hi == 0 { if div_lo == 0 { $zero_div_fn() } if duo_hi < div_lo { // `$uD` by `$uX` division with a quotient that will fit into a `$uX` let (quo, rem) = unsafe { $asymmetric_division(duo, div_lo) }; return (quo as $uD, rem as $uD); } else { // Short division using the $uD by $uX division let (quo_hi, rem_hi) = $half_division(duo_hi, div_lo); let tmp = unsafe { $asymmetric_division((duo_lo as $uD) | ((rem_hi as $uD) << n), div_lo) }; return ((tmp.0 as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD); } } // This has been adapted from // https://www.codeproject.com/tips/785014/uint-division-modulus which was in turn // adapted from Hacker's Delight. This is similar to the two possibility algorithm // in that it uses only more significant parts of `duo` and `div` to divide a large // integer with a smaller division instruction. let div_lz = div_hi.leading_zeros(); let div_extra = n - div_lz; let div_sig_n = (div >> div_extra) as $uX; let tmp = unsafe { $asymmetric_division(duo >> 1, div_sig_n) }; let mut quo = tmp.0 >> ((n - 1) - div_lz); if quo != 0 { quo -= 1; } // Note that this is a full `$uD` multiplication being used here let mut rem = duo - (quo as $uD).wrapping_mul(div); if div <= rem { quo += 1; rem -= div; } return (quo as $uD, rem); } }; } compiler_builtins-0.1.101/src/int/specialized_div_rem/binary_long.rs000064400000000000000000000644301046102023000237510ustar 00000000000000/// Creates an unsigned division function that uses binary long division, designed for /// computer architectures without division instructions. These functions have good performance for /// microarchitectures with large branch miss penalties and architectures without the ability to /// predicate instructions. For architectures with predicated instructions, one of the algorithms /// described in the documentation of these functions probably has higher performance, and a custom /// assembly routine should be used instead. #[allow(unused_macros)] macro_rules! impl_binary_long { ( $fn:ident, // name of the unsigned division function $zero_div_fn:ident, // function called when division by zero is attempted $normalization_shift:ident, // function for finding the normalization shift $n:tt, // the number of bits in a $iX or $uX $uX:ident, // unsigned integer type for the inputs and outputs of `$fn` $iX:ident // signed integer type with same bitwidth as `$uX` $(, $fun_attr:meta)* // attributes for the function ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. $( #[$fun_attr] )* pub fn $fn(duo: $uX, div: $uX) -> ($uX, $uX) { let mut duo = duo; // handle edge cases before calling `$normalization_shift` if div == 0 { $zero_div_fn() } if duo < div { return (0, duo); } // There are many variations of binary division algorithm that could be used. This // documentation gives a tour of different methods so that future readers wanting to // optimize further do not have to painstakingly derive them. The SWAR variation is // especially hard to understand without reading the less convoluted methods first. // You may notice that a `duo < div_original` check is included in many these // algorithms. A critical optimization that many algorithms miss is handling of // quotients that will turn out to have many trailing zeros or many leading zeros. This // happens in cases of exact or close-to-exact divisions, divisions by power of two, and // in cases where the quotient is small. The `duo < div_original` check handles these // cases of early returns and ends up replacing other kinds of mundane checks that // normally terminate a binary division algorithm. // // Something you may see in other algorithms that is not special-cased here is checks // for division by powers of two. The `duo < div_original` check handles this case and // more, however it can be checked up front before the bisection using the // `((div > 0) && ((div & (div - 1)) == 0))` trick. This is not special-cased because // compilers should handle most cases where divisions by power of two occur, and we do // not want to add on a few cycles for every division operation just to save a few // cycles rarely. // The following example is the most straightforward translation from the way binary // long division is typically visualized: // Dividing 178u8 (0b10110010) by 6u8 (0b110). `div` is shifted left by 5, according to // the result from `$normalization_shift(duo, div, false)`. // // Step 0: `sub` is negative, so there is not full normalization, so no `quo` bit is set // and `duo` is kept unchanged. // duo:10110010, div_shifted:11000000, sub:11110010, quo:00000000, shl:5 // // Step 1: `sub` is positive, set a `quo` bit and update `duo` for next step. // duo:10110010, div_shifted:01100000, sub:01010010, quo:00010000, shl:4 // // Step 2: Continue based on `sub`. The `quo` bits start accumulating. // duo:01010010, div_shifted:00110000, sub:00100010, quo:00011000, shl:3 // duo:00100010, div_shifted:00011000, sub:00001010, quo:00011100, shl:2 // duo:00001010, div_shifted:00001100, sub:11111110, quo:00011100, shl:1 // duo:00001010, div_shifted:00000110, sub:00000100, quo:00011100, shl:0 // The `duo < div_original` check terminates the algorithm with the correct quotient of // 29u8 and remainder of 4u8 /* let div_original = div; let mut shl = $normalization_shift(duo, div, false); let mut quo = 0; loop { let div_shifted = div << shl; let sub = duo.wrapping_sub(div_shifted); // it is recommended to use `println!`s like this if functionality is unclear /* println!("duo:{:08b}, div_shifted:{:08b}, sub:{:08b}, quo:{:08b}, shl:{}", duo, div_shifted, sub, quo, shl ); */ if 0 <= (sub as $iX) { duo = sub; quo += 1 << shl; if duo < div_original { // this branch is optional return (quo, duo) } } if shl == 0 { return (quo, duo) } shl -= 1; } */ // This restoring binary long division algorithm reduces the number of operations // overall via: // - `pow` can be shifted right instead of recalculating from `shl` // - starting `div` shifted left and shifting it right for each step instead of // recalculating from `shl` // - The `duo < div_original` branch is used to terminate the algorithm instead of the // `shl == 0` branch. This check is strong enough to prevent set bits of `pow` and // `div` from being shifted off the end. This check also only occurs on half of steps // on average, since it is behind the `(sub as $iX) >= 0` branch. // - `shl` is now not needed by any aspect of of the loop and thus only 3 variables are // being updated between steps // // There are many variations of this algorithm, but this encompases the largest number // of architectures and does not rely on carry flags, add-with-carry, or SWAR // complications to be decently fast. /* let div_original = div; let shl = $normalization_shift(duo, div, false); let mut div: $uX = div << shl; let mut pow: $uX = 1 << shl; let mut quo: $uX = 0; loop { let sub = duo.wrapping_sub(div); if 0 <= (sub as $iX) { duo = sub; quo |= pow; if duo < div_original { return (quo, duo) } } div >>= 1; pow >>= 1; } */ // If the architecture has flags and predicated arithmetic instructions, it is possible // to do binary long division without branching and in only 3 or 4 instructions. This is // a variation of a 3 instruction central loop from // http://www.chiark.greenend.org.uk/~theom/riscos/docs/ultimate/a252div.txt. // // What allows doing division in only 3 instructions is realizing that instead of // keeping `duo` in place and shifting `div` right to align bits, `div` can be kept in // place and `duo` can be shifted left. This means `div` does not have to be updated, // but causes edge case problems and makes `duo < div_original` tests harder. Some // architectures have an option to shift an argument in an arithmetic operation, which // means `duo` can be shifted left and subtracted from in one instruction. The other two // instructions are updating `quo` and undoing the subtraction if it turns out things // were not normalized. /* // Perform one binary long division step on the already normalized arguments, because // the main. Note that this does a full normalization since the central loop needs // `duo.leading_zeros()` to be at least 1 more than `div.leading_zeros()`. The original // variation only did normalization to the nearest 4 steps, but this makes handling edge // cases much harder. We do a full normalization and perform a binary long division // step. In the edge case where the msbs of `duo` and `div` are set, it clears the msb // of `duo`, then the edge case handler shifts `div` right and does another long // division step to always insure `duo.leading_zeros() + 1 >= div.leading_zeros()`. let div_original = div; let mut shl = $normalization_shift(duo, div, true); let mut div: $uX = (div << shl); let mut quo: $uX = 1; duo = duo.wrapping_sub(div); if duo < div_original { return (1 << shl, duo); } let div_neg: $uX; if (div as $iX) < 0 { // A very ugly edge case where the most significant bit of `div` is set (after // shifting to match `duo` when its most significant bit is at the sign bit), which // leads to the sign bit of `div_neg` being cut off and carries not happening when // they should. This branch performs a long division step that keeps `duo` in place // and shifts `div` down. div >>= 1; div_neg = div.wrapping_neg(); let (sub, carry) = duo.overflowing_add(div_neg); duo = sub; quo = quo.wrapping_add(quo).wrapping_add(carry as $uX); if !carry { duo = duo.wrapping_add(div); } shl -= 1; } else { div_neg = div.wrapping_neg(); } // The add-with-carry that updates `quo` needs to have the carry set when a normalized // subtract happens. Using `duo.wrapping_shl(1).overflowing_sub(div)` to do the // subtraction generates a carry when an unnormalized subtract happens, which is the // opposite of what we want. Instead, we use // `duo.wrapping_shl(1).overflowing_add(div_neg)`, where `div_neg` is negative `div`. let mut i = shl; loop { if i == 0 { break; } i -= 1; // `ADDS duo, div, duo, LSL #1` // (add `div` to `duo << 1` and set flags) let (sub, carry) = duo.wrapping_shl(1).overflowing_add(div_neg); duo = sub; // `ADC quo, quo, quo` // (add with carry). Effectively shifts `quo` left by 1 and sets the least // significant bit to the carry. quo = quo.wrapping_add(quo).wrapping_add(carry as $uX); // `ADDCC duo, duo, div` // (add if carry clear). Undoes the subtraction if no carry was generated. if !carry { duo = duo.wrapping_add(div); } } return (quo, duo >> shl); */ // This is the SWAR (SIMD within in a register) restoring division algorithm. // This combines several ideas of the above algorithms: // - If `duo` is shifted left instead of shifting `div` right like in the 3 instruction // restoring division algorithm, some architectures can do the shifting and // subtraction step in one instruction. // - `quo` can be constructed by adding powers-of-two to it or shifting it left by one // and adding one. // - Every time `duo` is shifted left, there is another unused 0 bit shifted into the // LSB, so what if we use those bits to store `quo`? // Through a complex setup, it is possible to manage `duo` and `quo` in the same // register, and perform one step with 2 or 3 instructions. The only major downsides are // that there is significant setup (it is only saves instructions if `shl` is // approximately more than 4), `duo < div_original` checks are impractical once SWAR is // initiated, and the number of division steps taken has to be exact (we cannot do more // division steps than `shl`, because it introduces edge cases where quotient bits in // `duo` start to collide with the real part of `div`. /* // first step. The quotient bit is stored in `quo` for now let div_original = div; let mut shl = $normalization_shift(duo, div, true); let mut div: $uX = (div << shl); duo = duo.wrapping_sub(div); let mut quo: $uX = 1 << shl; if duo < div_original { return (quo, duo); } let mask: $uX; if (div as $iX) < 0 { // deal with same edge case as the 3 instruction restoring division algorithm, but // the quotient bit from this step also has to be stored in `quo` div >>= 1; shl -= 1; let tmp = 1 << shl; mask = tmp - 1; let sub = duo.wrapping_sub(div); if (sub as $iX) >= 0 { // restore duo = sub; quo |= tmp; } if duo < div_original { return (quo, duo); } } else { mask = quo - 1; } // There is now room for quotient bits in `duo`. // Note that `div` is already shifted left and has `shl` unset bits. We subtract 1 from // `div` and end up with the subset of `shl` bits being all being set. This subset acts // just like a two's complement negative one. The subset of `div` containing the divisor // had 1 subtracted from it, but a carry will always be generated from the `shl` subset // as long as the quotient stays positive. // // When the modified `div` is subtracted from `duo.wrapping_shl(1)`, the `shl` subset // adds a quotient bit to the least significant bit. // For example, 89 (0b01011001) divided by 3 (0b11): // // shl:4, div:0b00110000 // first step: // duo:0b01011001 // + div_neg:0b11010000 // ____________________ // 0b00101001 // quo is set to 0b00010000 and mask is set to 0b00001111 for later // // 1 is subtracted from `div`. I will differentiate the `shl` part of `div` and the // quotient part of `duo` with `^`s. // chars. // div:0b00110000 // ^^^^ // + 0b11111111 // ________________ // 0b00101111 // ^^^^ // div_neg:0b11010001 // // first SWAR step: // duo_shl1:0b01010010 // ^ // + div_neg:0b11010001 // ____________________ // 0b00100011 // ^ // second: // duo_shl1:0b01000110 // ^^ // + div_neg:0b11010001 // ____________________ // 0b00010111 // ^^ // third: // duo_shl1:0b00101110 // ^^^ // + div_neg:0b11010001 // ____________________ // 0b11111111 // ^^^ // 3 steps resulted in the quotient with 3 set bits as expected, but currently the real // part of `duo` is negative and the third step was an unnormalized step. The restore // branch then restores `duo`. Note that the restore branch does not shift `duo` left. // // duo:0b11111111 // ^^^ // + div:0b00101111 // ^^^^ // ________________ // 0b00101110 // ^^^ // `duo` is now back in the `duo_shl1` state it was at in the the third step, with an // unset quotient bit. // // final step (`shl` was 4, so exactly 4 steps must be taken) // duo_shl1:0b01011100 // ^^^^ // + div_neg:0b11010001 // ____________________ // 0b00101101 // ^^^^ // The quotient includes the `^` bits added with the `quo` bits from the beginning that // contained the first step and potential edge case step, // `quo:0b00010000 + (duo:0b00101101 & mask:0b00001111) == 0b00011101 == 29u8`. // The remainder is the bits remaining in `duo` that are not part of the quotient bits, // `duo:0b00101101 >> shl == 0b0010 == 2u8`. let div: $uX = div.wrapping_sub(1); let mut i = shl; loop { if i == 0 { break; } i -= 1; duo = duo.wrapping_shl(1).wrapping_sub(div); if (duo as $iX) < 0 { // restore duo = duo.wrapping_add(div); } } // unpack the results of SWAR return ((duo & mask) | quo, duo >> shl); */ // The problem with the conditional restoring SWAR algorithm above is that, in practice, // it requires assembly code to bring out its full unrolled potential (It seems that // LLVM can't use unrolled conditionals optimally and ends up erasing all the benefit // that my algorithm intends. On architectures without predicated instructions, the code // gen is especially bad. We need a default software division algorithm that is // guaranteed to get decent code gen for the central loop. // For non-SWAR algorithms, there is a way to do binary long division without // predication or even branching. This involves creating a mask from the sign bit and // performing different kinds of steps using that. /* let shl = $normalization_shift(duo, div, true); let mut div: $uX = div << shl; let mut pow: $uX = 1 << shl; let mut quo: $uX = 0; loop { let sub = duo.wrapping_sub(div); let sign_mask = !((sub as $iX).wrapping_shr($n - 1) as $uX); duo -= div & sign_mask; quo |= pow & sign_mask; div >>= 1; pow >>= 1; if pow == 0 { break; } } return (quo, duo); */ // However, it requires about 4 extra operations (smearing the sign bit, negating the // mask, and applying the mask twice) on top of the operations done by the actual // algorithm. With SWAR however, just 2 extra operations are needed, making it // practical and even the most optimal algorithm for some architectures. // What we do is use custom assembly for predicated architectures that need software // division, and for the default algorithm use a mask based restoring SWAR algorithm // without conditionals or branches. On almost all architectures, this Rust code is // guaranteed to compile down to 5 assembly instructions or less for each step, and LLVM // will unroll it in a decent way. // standard opening for SWAR algorithm with first step and edge case handling let div_original = div; let mut shl = $normalization_shift(duo, div, true); let mut div: $uX = (div << shl); duo = duo.wrapping_sub(div); let mut quo: $uX = 1 << shl; if duo < div_original { return (quo, duo); } let mask: $uX; if (div as $iX) < 0 { div >>= 1; shl -= 1; let tmp = 1 << shl; mask = tmp - 1; let sub = duo.wrapping_sub(div); if (sub as $iX) >= 0 { duo = sub; quo |= tmp; } if duo < div_original { return (quo, duo); } } else { mask = quo - 1; } // central loop div = div.wrapping_sub(1); let mut i = shl; loop { if i == 0 { break; } i -= 1; // shift left 1 and subtract duo = duo.wrapping_shl(1).wrapping_sub(div); // create mask let mask = (duo as $iX).wrapping_shr($n - 1) as $uX; // restore duo = duo.wrapping_add(div & mask); } // unpack return ((duo & mask) | quo, duo >> shl); // miscellanious binary long division algorithms that might be better for specific // architectures // Another kind of long division uses an interesting fact that `div` and `pow` can be // negated when `duo` is negative to perform a "negated" division step that works in // place of any normalization mechanism. This is a non-restoring division algorithm that // is very similar to the non-restoring division algorithms that can be found on the // internet, except there is only one test for `duo < 0`. The subtraction from `quo` can // be viewed as shifting the least significant set bit right (e.x. if we enter a series // of negated binary long division steps starting with `quo == 0b1011_0000` and // `pow == 0b0000_1000`, `quo` will progress like this: 0b1010_1000, 0b1010_0100, // 0b1010_0010, 0b1010_0001). /* let div_original = div; let shl = $normalization_shift(duo, div, true); let mut div: $uX = (div << shl); let mut pow: $uX = 1 << shl; let mut quo: $uX = pow; duo = duo.wrapping_sub(div); if duo < div_original { return (quo, duo); } div >>= 1; pow >>= 1; loop { if (duo as $iX) < 0 { // Negated binary long division step. duo = duo.wrapping_add(div); quo = quo.wrapping_sub(pow); } else { // Normal long division step. if duo < div_original { return (quo, duo) } duo = duo.wrapping_sub(div); quo = quo.wrapping_add(pow); } pow >>= 1; div >>= 1; } */ // This is the Nonrestoring SWAR algorithm, combining the nonrestoring algorithm with // SWAR techniques that makes the only difference between steps be negation of `div`. // If there was an architecture with an instruction that negated inputs to an adder // based on conditionals, and in place shifting (or a three input addition operation // that can have `duo` as two of the inputs to effectively shift it left by 1), then a // single instruction central loop is possible. Microarchitectures often have inputs to // their ALU that can invert the arguments and carry in of adders, but the architectures // unfortunately do not have an instruction to dynamically invert this input based on // conditionals. /* // SWAR opening let div_original = div; let mut shl = $normalization_shift(duo, div, true); let mut div: $uX = (div << shl); duo = duo.wrapping_sub(div); let mut quo: $uX = 1 << shl; if duo < div_original { return (quo, duo); } let mask: $uX; if (div as $iX) < 0 { div >>= 1; shl -= 1; let tmp = 1 << shl; let sub = duo.wrapping_sub(div); if (sub as $iX) >= 0 { // restore duo = sub; quo |= tmp; } if duo < div_original { return (quo, duo); } mask = tmp - 1; } else { mask = quo - 1; } // central loop let div: $uX = div.wrapping_sub(1); let mut i = shl; loop { if i == 0 { break; } i -= 1; // note: the `wrapping_shl(1)` can be factored out, but would require another // restoring division step to prevent `(duo as $iX)` from overflowing if (duo as $iX) < 0 { // Negated binary long division step. duo = duo.wrapping_shl(1).wrapping_add(div); } else { // Normal long division step. duo = duo.wrapping_shl(1).wrapping_sub(div); } } if (duo as $iX) < 0 { // Restore. This was not needed in the original nonrestoring algorithm because of // the `duo < div_original` checks. duo = duo.wrapping_add(div); } // unpack return ((duo & mask) | quo, duo >> shl); */ } }; } compiler_builtins-0.1.101/src/int/specialized_div_rem/delegate.rs000064400000000000000000000354711046102023000232230ustar 00000000000000/// Creates an unsigned division function that uses a combination of hardware division and /// binary long division to divide integers larger than what hardware division by itself can do. This /// function is intended for microarchitectures that have division hardware, but not fast enough /// multiplication hardware for `impl_trifecta` to be faster. #[allow(unused_macros)] macro_rules! impl_delegate { ( $fn:ident, // name of the unsigned division function $zero_div_fn:ident, // function called when division by zero is attempted $half_normalization_shift:ident, // function for finding the normalization shift of $uX $half_division:ident, // function for division of a $uX by a $uX $n_h:expr, // the number of bits in $iH or $uH $uH:ident, // unsigned integer with half the bit width of $uX $uX:ident, // unsigned integer with half the bit width of $uD. $uD:ident, // unsigned integer type for the inputs and outputs of `$fn` $iD:ident // signed integer type with the same bitwidth as `$uD` ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { // The two possibility algorithm, undersubtracting long division algorithm, or any kind // of reciprocal based algorithm will not be fastest, because they involve large // multiplications that we assume to not be fast enough relative to the divisions to // outweigh setup times. // the number of bits in a $uX let n = $n_h * 2; let duo_lo = duo as $uX; let duo_hi = (duo >> n) as $uX; let div_lo = div as $uX; let div_hi = (div >> n) as $uX; match (div_lo == 0, div_hi == 0, duo_hi == 0) { (true, true, _) => $zero_div_fn(), (_, false, true) => { // `duo` < `div` return (0, duo); } (false, true, true) => { // delegate to smaller division let tmp = $half_division(duo_lo, div_lo); return (tmp.0 as $uD, tmp.1 as $uD); } (false, true, false) => { if duo_hi < div_lo { // `quo_hi` will always be 0. This performs a binary long division algorithm // to zero `duo_hi` followed by a half division. // We can calculate the normalization shift using only `$uX` size functions. // If we calculated the normalization shift using // `$half_normalization_shift(duo_hi, div_lo false)`, it would break the // assumption the function has that the first argument is more than the // second argument. If the arguments are switched, the assumption holds true // since `duo_hi < div_lo`. let norm_shift = $half_normalization_shift(div_lo, duo_hi, false); let shl = if norm_shift == 0 { // Consider what happens if the msbs of `duo_hi` and `div_lo` align with // no shifting. The normalization shift will always return // `norm_shift == 0` regardless of whether it is fully normalized, // because `duo_hi < div_lo`. In that edge case, `n - norm_shift` would // result in shift overflow down the line. For the edge case, because // both `duo_hi < div_lo` and we are comparing all the significant bits // of `duo_hi` and `div`, we can make `shl = n - 1`. n - 1 } else { // We also cannot just use `shl = n - norm_shift - 1` in the general // case, because when we are not in the edge case comparing all the // significant bits, then the full `duo < div` may not be true and thus // breaks the division algorithm. n - norm_shift }; // The 3 variable restoring division algorithm (see binary_long.rs) is ideal // for this task, since `pow` and `quo` can be `$uX` and the delegation // check is simple. let mut div: $uD = div << shl; let mut pow_lo: $uX = 1 << shl; let mut quo_lo: $uX = 0; let mut duo = duo; loop { let sub = duo.wrapping_sub(div); if 0 <= (sub as $iD) { duo = sub; quo_lo |= pow_lo; let duo_hi = (duo >> n) as $uX; if duo_hi == 0 { // Delegate to get the rest of the quotient. Note that the // `div_lo` here is the original unshifted `div`. let tmp = $half_division(duo as $uX, div_lo); return ((quo_lo | tmp.0) as $uD, tmp.1 as $uD); } } div >>= 1; pow_lo >>= 1; } } else if duo_hi == div_lo { // `quo_hi == 1`. This branch is cheap and helps with edge cases. let tmp = $half_division(duo as $uX, div as $uX); return ((1 << n) | (tmp.0 as $uD), tmp.1 as $uD); } else { // `div_lo < duo_hi` // `rem_hi == 0` if (div_lo >> $n_h) == 0 { // Short division of $uD by a $uH, using $uX by $uX division let div_0 = div_lo as $uH as $uX; let (quo_hi, rem_3) = $half_division(duo_hi, div_0); let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h); let (quo_1, rem_2) = $half_division(duo_mid, div_0); let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h); let (quo_0, rem_1) = $half_division(duo_lo, div_0); return ( (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n), rem_1 as $uD, ); } // This is basically a short division composed of a half division for the hi // part, specialized 3 variable binary long division in the middle, and // another half division for the lo part. let duo_lo = duo as $uX; let tmp = $half_division(duo_hi, div_lo); let quo_hi = tmp.0; let mut duo = (duo_lo as $uD) | ((tmp.1 as $uD) << n); // This check is required to avoid breaking the long division below. if duo < div { return ((quo_hi as $uD) << n, duo); } // The half division handled all shift alignments down to `n`, so this // division can continue with a shift of `n - 1`. let mut div: $uD = div << (n - 1); let mut pow_lo: $uX = 1 << (n - 1); let mut quo_lo: $uX = 0; loop { let sub = duo.wrapping_sub(div); if 0 <= (sub as $iD) { duo = sub; quo_lo |= pow_lo; let duo_hi = (duo >> n) as $uX; if duo_hi == 0 { // Delegate to get the rest of the quotient. Note that the // `div_lo` here is the original unshifted `div`. let tmp = $half_division(duo as $uX, div_lo); return ( (tmp.0) as $uD | (quo_lo as $uD) | ((quo_hi as $uD) << n), tmp.1 as $uD, ); } } div >>= 1; pow_lo >>= 1; } } } (_, false, false) => { // Full $uD by $uD binary long division. `quo_hi` will always be 0. if duo < div { return (0, duo); } let div_original = div; let shl = $half_normalization_shift(duo_hi, div_hi, false); let mut duo = duo; let mut div: $uD = div << shl; let mut pow_lo: $uX = 1 << shl; let mut quo_lo: $uX = 0; loop { let sub = duo.wrapping_sub(div); if 0 <= (sub as $iD) { duo = sub; quo_lo |= pow_lo; if duo < div_original { return (quo_lo as $uD, duo); } } div >>= 1; pow_lo >>= 1; } } } } }; } public_test_dep! { /// Returns `n / d` and sets `*rem = n % d`. /// /// This specialization exists because: /// - The LLVM backend for 32-bit SPARC cannot compile functions that return `(u128, u128)`, /// so we have to use an old fashioned `&mut u128` argument to return the remainder. /// - 64-bit SPARC does not have u64 * u64 => u128 widening multiplication, which makes the /// delegate algorithm strategy the only reasonably fast way to perform `u128` division. // used on SPARC #[allow(dead_code)] pub(crate) fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 { use super::*; let duo_lo = duo as u64; let duo_hi = (duo >> 64) as u64; let div_lo = div as u64; let div_hi = (div >> 64) as u64; match (div_lo == 0, div_hi == 0, duo_hi == 0) { (true, true, _) => zero_div_fn(), (_, false, true) => { *rem = duo; return 0; } (false, true, true) => { let tmp = u64_by_u64_div_rem(duo_lo, div_lo); *rem = tmp.1 as u128; return tmp.0 as u128; } (false, true, false) => { if duo_hi < div_lo { let norm_shift = u64_normalization_shift(div_lo, duo_hi, false); let shl = if norm_shift == 0 { 64 - 1 } else { 64 - norm_shift }; let mut div: u128 = div << shl; let mut pow_lo: u64 = 1 << shl; let mut quo_lo: u64 = 0; let mut duo = duo; loop { let sub = duo.wrapping_sub(div); if 0 <= (sub as i128) { duo = sub; quo_lo |= pow_lo; let duo_hi = (duo >> 64) as u64; if duo_hi == 0 { let tmp = u64_by_u64_div_rem(duo as u64, div_lo); *rem = tmp.1 as u128; return (quo_lo | tmp.0) as u128; } } div >>= 1; pow_lo >>= 1; } } else if duo_hi == div_lo { let tmp = u64_by_u64_div_rem(duo as u64, div as u64); *rem = tmp.1 as u128; return (1 << 64) | (tmp.0 as u128); } else { if (div_lo >> 32) == 0 { let div_0 = div_lo as u32 as u64; let (quo_hi, rem_3) = u64_by_u64_div_rem(duo_hi, div_0); let duo_mid = ((duo >> 32) as u32 as u64) | (rem_3 << 32); let (quo_1, rem_2) = u64_by_u64_div_rem(duo_mid, div_0); let duo_lo = (duo as u32 as u64) | (rem_2 << 32); let (quo_0, rem_1) = u64_by_u64_div_rem(duo_lo, div_0); *rem = rem_1 as u128; return (quo_0 as u128) | ((quo_1 as u128) << 32) | ((quo_hi as u128) << 64); } let duo_lo = duo as u64; let tmp = u64_by_u64_div_rem(duo_hi, div_lo); let quo_hi = tmp.0; let mut duo = (duo_lo as u128) | ((tmp.1 as u128) << 64); if duo < div { *rem = duo; return (quo_hi as u128) << 64; } let mut div: u128 = div << (64 - 1); let mut pow_lo: u64 = 1 << (64 - 1); let mut quo_lo: u64 = 0; loop { let sub = duo.wrapping_sub(div); if 0 <= (sub as i128) { duo = sub; quo_lo |= pow_lo; let duo_hi = (duo >> 64) as u64; if duo_hi == 0 { let tmp = u64_by_u64_div_rem(duo as u64, div_lo); *rem = tmp.1 as u128; return (tmp.0) as u128 | (quo_lo as u128) | ((quo_hi as u128) << 64); } } div >>= 1; pow_lo >>= 1; } } } (_, false, false) => { if duo < div { *rem = duo; return 0; } let div_original = div; let shl = u64_normalization_shift(duo_hi, div_hi, false); let mut duo = duo; let mut div: u128 = div << shl; let mut pow_lo: u64 = 1 << shl; let mut quo_lo: u64 = 0; loop { let sub = duo.wrapping_sub(div); if 0 <= (sub as i128) { duo = sub; quo_lo |= pow_lo; if duo < div_original { *rem = duo; return quo_lo as u128; } } div >>= 1; pow_lo >>= 1; } } } } } compiler_builtins-0.1.101/src/int/specialized_div_rem/mod.rs000064400000000000000000000250641046102023000222250ustar 00000000000000// TODO: when `unsafe_block_in_unsafe_fn` is stabilized, remove this #![allow(unused_unsafe)] // The functions are complex with many branches, and explicit // `return`s makes it clear where function exit points are #![allow(clippy::needless_return)] #![allow(clippy::comparison_chain)] // Clippy is confused by the complex configuration #![allow(clippy::if_same_then_else)] #![allow(clippy::needless_bool)] //! This `specialized_div_rem` module is originally from version 1.0.0 of the //! `specialized-div-rem` crate. Note that `for` loops with ranges are not used in this //! module, since unoptimized compilation may generate references to `memcpy`. //! //! The purpose of these macros is to easily change the both the division algorithm used //! for a given integer size and the half division used by that algorithm. The way //! functions call each other is also constructed such that linkers will find the chain of //! software and hardware divisions needed for every size of signed and unsigned division. //! For example, most target compilations do the following: //! //! - Many 128 bit division functions like `u128::wrapping_div` use //! `std::intrinsics::unchecked_div`, which gets replaced by `__udivti3` because there //! is not a 128 bit by 128 bit hardware division function in most architectures. //! `__udivti3` uses `u128_div_rem` (this extra level of function calls exists because //! `__umodti3` and `__udivmodti4` also exist, and `specialized_div_rem` supplies just //! one function to calculate both the quotient and remainder. If configuration flags //! enable it, `impl_trifecta!` defines `u128_div_rem` to use the trifecta algorithm, //! which requires the half sized division `u64_by_u64_div_rem`. If the architecture //! supplies a 64 bit hardware division instruction, `u64_by_u64_div_rem` will be //! reduced to those instructions. Note that we do not specify the half size division //! directly to be `__udivdi3`, because hardware division would never be introduced. //! - If the architecture does not supply a 64 bit hardware division instruction, u64 //! divisions will use functions such as `__udivdi3`. This will call `u64_div_rem` //! which is defined by `impl_delegate!`. The half division for this algorithm is //! `u32_by_u32_div_rem` which in turn becomes hardware division instructions or more //! software division algorithms. //! - If the architecture does not supply a 32 bit hardware instruction, linkers will //! look for `__udivsi3`. `impl_binary_long!` is used, but this algorithm uses no half //! division, so the chain of calls ends here. //! //! On some architectures like x86_64, an asymmetrically sized division is supplied, in //! which 128 bit numbers can be divided by 64 bit numbers. `impl_asymmetric!` is used to //! extend the 128 by 64 bit division to a full 128 by 128 bit division. // `allow(dead_code)` is used in various places, because the configuration code would otherwise be // ridiculously complex #[macro_use] mod norm_shift; #[macro_use] mod binary_long; #[macro_use] mod delegate; // used on SPARC #[allow(unused_imports)] #[cfg(not(feature = "public-test-deps"))] pub(crate) use self::delegate::u128_divide_sparc; #[cfg(feature = "public-test-deps")] pub use self::delegate::u128_divide_sparc; #[macro_use] mod trifecta; #[macro_use] mod asymmetric; /// The behavior of all divisions by zero is controlled by this function. This function should be /// impossible to reach by Rust users, unless `compiler-builtins` public division functions or /// `core/std::unchecked_div/rem` are directly used without a zero check in front. fn zero_div_fn() -> ! { // Calling the intrinsic directly, to avoid the `assert_unsafe_precondition` that cannot be used // here because it involves non-`inline` functions // (https://github.com/rust-lang/compiler-builtins/issues/491). unsafe { core::intrinsics::unreachable() } } const USE_LZ: bool = { if cfg!(target_arch = "arm") { if cfg!(target_feature = "thumb-mode") { // ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is // supported. This is needed to successfully differentiate between targets like // `thumbv8.base` and `thumbv8.main`. cfg!(target_feature = "v6t2") } else { // Regular ARM targets have CLZ instructions if the ARMv5TE instruction set is // supported. Technically, ARMv5T was the first to have CLZ, but the "v5t" target // feature does not seem to work. cfg!(target_feature = "v5te") } } else if cfg!(any(target_arch = "sparc", target_arch = "sparc64")) { // LZD or LZCNT on SPARC only exists for the VIS 3 extension and later. cfg!(target_feature = "vis3") } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) { // The 'Zbb' Basic Bit-Manipulation extension on RISC-V // determines if a CLZ assembly instruction exists cfg!(target_feature = "zbb") } else { // All other common targets Rust supports should have CLZ instructions true } }; impl_normalization_shift!( u32_normalization_shift, USE_LZ, 32, u32, i32, allow(dead_code) ); impl_normalization_shift!( u64_normalization_shift, USE_LZ, 64, u64, i64, allow(dead_code) ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. /// `checked_div` and `checked_rem` are used to avoid bringing in panic function /// dependencies. #[inline] fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) { if let Some(quo) = duo.checked_div(div) { if let Some(rem) = duo.checked_rem(div) { return (quo, rem); } } zero_div_fn() } // Whether `trifecta` or `delegate` is faster for 128 bit division depends on the speed at which a // microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is // faster if the target pointer width is at least 64. #[cfg(all( not(any(target_pointer_width = "16", target_pointer_width = "32")), not(all(not(feature = "no-asm"), target_arch = "x86_64")), not(any(target_arch = "sparc", target_arch = "sparc64")) ))] impl_trifecta!( u128_div_rem, zero_div_fn, u64_by_u64_div_rem, 32, u32, u64, u128 ); // If the pointer width less than 64, then the target architecture almost certainly does not have // the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster. #[cfg(all( any(target_pointer_width = "16", target_pointer_width = "32"), not(all(not(feature = "no-asm"), target_arch = "x86_64")), not(any(target_arch = "sparc", target_arch = "sparc64")) ))] impl_delegate!( u128_div_rem, zero_div_fn, u64_normalization_shift, u64_by_u64_div_rem, 32, u32, u64, u128, i128 ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. /// /// # Safety /// /// If the quotient does not fit in a `u64`, a floating point exception occurs. /// If `div == 0`, then a division by zero exception occurs. #[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))] #[inline] unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) { let duo_lo = duo as u64; let duo_hi = (duo >> 64) as u64; let quo: u64; let rem: u64; unsafe { // divides the combined registers rdx:rax (`duo` is split into two 64 bit parts to do this) // by `div`. The quotient is stored in rax and the remainder in rdx. // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. core::arch::asm!( "div {0}", in(reg) div, inlateout("rax") duo_lo => quo, inlateout("rdx") duo_hi => rem, options(att_syntax, pure, nomem, nostack) ); } (quo, rem) } // use `asymmetric` instead of `trifecta` on x86_64 #[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))] impl_asymmetric!( u128_div_rem, zero_div_fn, u64_by_u64_div_rem, u128_by_u64_div_rem, 32, u32, u64, u128 ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. /// `checked_div` and `checked_rem` are used to avoid bringing in panic function /// dependencies. #[inline] #[allow(dead_code)] fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) { if let Some(quo) = duo.checked_div(div) { if let Some(rem) = duo.checked_rem(div) { return (quo, rem); } } zero_div_fn() } // When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger // than register size. #[cfg(all( not(all(not(feature = "no-asm"), target_arch = "x86")), not(target_pointer_width = "64") ))] impl_delegate!( u64_div_rem, zero_div_fn, u32_normalization_shift, u32_by_u32_div_rem, 16, u16, u32, u64, i64 ); // When not on x86 and the pointer width is 64, use `binary_long`. #[cfg(all( not(all(not(feature = "no-asm"), target_arch = "x86")), target_pointer_width = "64" ))] impl_binary_long!( u64_div_rem, zero_div_fn, u64_normalization_shift, 64, u64, i64 ); /// Divides `duo` by `div` and returns a tuple of the quotient and the remainder. /// /// # Safety /// /// If the quotient does not fit in a `u32`, a floating point exception occurs. /// If `div == 0`, then a division by zero exception occurs. #[cfg(all(not(feature = "no-asm"), target_arch = "x86"))] #[inline] unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) { let duo_lo = duo as u32; let duo_hi = (duo >> 32) as u32; let quo: u32; let rem: u32; unsafe { // divides the combined registers rdx:rax (`duo` is split into two 32 bit parts to do this) // by `div`. The quotient is stored in rax and the remainder in rdx. // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. core::arch::asm!( "div {0}", in(reg) div, inlateout("rax") duo_lo => quo, inlateout("rdx") duo_hi => rem, options(att_syntax, pure, nomem, nostack) ); } (quo, rem) } // use `asymmetric` instead of `delegate` on x86 #[cfg(all(not(feature = "no-asm"), target_arch = "x86"))] impl_asymmetric!( u64_div_rem, zero_div_fn, u32_by_u32_div_rem, u64_by_u32_div_rem, 16, u16, u32, u64 ); // 32 bits is the smallest division used by `compiler-builtins`, so we end with binary long division impl_binary_long!( u32_div_rem, zero_div_fn, u32_normalization_shift, 32, u32, i32, allow(dead_code) ); compiler_builtins-0.1.101/src/int/specialized_div_rem/norm_shift.rs000064400000000000000000000123101046102023000236040ustar 00000000000000/// Creates a function used by some division algorithms to compute the "normalization shift". #[allow(unused_macros)] macro_rules! impl_normalization_shift { ( $name:ident, // name of the normalization shift function // boolean for if `$uX::leading_zeros` should be used (if an architecture does not have a // hardware instruction for `usize::leading_zeros`, then this should be `true`) $use_lz:ident, $n:tt, // the number of bits in a $iX or $uX $uX:ident, // unsigned integer type for the inputs of `$name` $iX:ident, // signed integer type for the inputs of `$name` $($unsigned_attr:meta),* // attributes for the function ) => { /// Finds the shift left that the divisor `div` would need to be normalized for a binary /// long division step with the dividend `duo`. NOTE: This function assumes that these edge /// cases have been handled before reaching it: /// ` /// if div == 0 { /// panic!("attempt to divide by zero") /// } /// if duo < div { /// return (0, duo) /// } /// ` /// /// Normalization is defined as (where `shl` is the output of this function): /// ` /// if duo.leading_zeros() != (div << shl).leading_zeros() { /// // If the most significant bits of `duo` and `div << shl` are not in the same place, /// // then `div << shl` has one more leading zero than `duo`. /// assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros()); /// // Also, `2*(div << shl)` is not more than `duo` (otherwise the first division step /// // would not be able to clear the msb of `duo`) /// assert!(duo < (div << (shl + 1))); /// } /// if full_normalization { /// // Some algorithms do not need "full" normalization, which means that `duo` is /// // larger than `div << shl` when the most significant bits are aligned. /// assert!((div << shl) <= duo); /// } /// ` /// /// Note: If the software bisection algorithm is being used in this function, it happens /// that full normalization always occurs, so be careful that new algorithms are not /// invisibly depending on this invariant when `full_normalization` is set to `false`. $( #[$unsigned_attr] )* fn $name(duo: $uX, div: $uX, full_normalization: bool) -> usize { // We have to find the leading zeros of `div` to know where its msb (most significant // set bit) is to even begin binary long division. It is also good to know where the msb // of `duo` is so that useful work can be started instead of shifting `div` for all // possible quotients (many division steps are wasted if `duo.leading_zeros()` is large // and `div` starts out being shifted all the way to the msb). Aligning the msbs of // `div` and `duo` could be done by shifting `div` left by // `div.leading_zeros() - duo.leading_zeros()`, but some CPUs without division hardware // also do not have single instructions for calculating `leading_zeros`. Instead of // software doing two bisections to find the two `leading_zeros`, we do one bisection to // find `div.leading_zeros() - duo.leading_zeros()` without actually knowing either of // the leading zeros values. let mut shl: usize; if $use_lz { shl = (div.leading_zeros() - duo.leading_zeros()) as usize; if full_normalization { if duo < (div << shl) { // when the msb of `duo` and `div` are aligned, the resulting `div` may be // larger than `duo`, so we decrease the shift by 1. shl -= 1; } } } else { let mut test = duo; shl = 0usize; let mut lvl = $n >> 1; loop { let tmp = test >> lvl; // It happens that a final `duo < (div << shl)` check is not needed, because the // `div <= tmp` check insures that the msb of `test` never passes the msb of // `div`, and any set bits shifted off the end of `test` would still keep // `div <= tmp` true. if div <= tmp { test = tmp; shl += lvl; } // narrow down bisection lvl >>= 1; if lvl == 0 { break } } } // tests the invariants that should hold before beginning binary long division /* if full_normalization { assert!((div << shl) <= duo); } if duo.leading_zeros() != (div << shl).leading_zeros() { assert_eq!(duo.leading_zeros() + 1, (div << shl).leading_zeros()); assert!(duo < (div << (shl + 1))); } */ shl } } } compiler_builtins-0.1.101/src/int/specialized_div_rem/trifecta.rs000064400000000000000000000477441046102023000232600ustar 00000000000000/// Creates an unsigned division function optimized for division of integers with bitwidths /// larger than the largest hardware integer division supported. These functions use large radix /// division algorithms that require both fast division and very fast widening multiplication on the /// target microarchitecture. Otherwise, `impl_delegate` should be used instead. #[allow(unused_macros)] macro_rules! impl_trifecta { ( $fn:ident, // name of the unsigned division function $zero_div_fn:ident, // function called when division by zero is attempted $half_division:ident, // function for division of a $uX by a $uX $n_h:expr, // the number of bits in $iH or $uH $uH:ident, // unsigned integer with half the bit width of $uX $uX:ident, // unsigned integer with half the bit width of $uD $uD:ident // unsigned integer type for the inputs and outputs of `$unsigned_name` ) => { /// Computes the quotient and remainder of `duo` divided by `div` and returns them as a /// tuple. pub fn $fn(duo: $uD, div: $uD) -> ($uD, $uD) { // This is called the trifecta algorithm because it uses three main algorithms: short // division for small divisors, the two possibility algorithm for large divisors, and an // undersubtracting long division algorithm for intermediate cases. // This replicates `carrying_mul` (rust-lang rfc #2417). LLVM correctly optimizes this // to use a widening multiply to 128 bits on the relevant architectures. fn carrying_mul(lhs: $uX, rhs: $uX) -> ($uX, $uX) { let tmp = (lhs as $uD).wrapping_mul(rhs as $uD); (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) } fn carrying_mul_add(lhs: $uX, mul: $uX, add: $uX) -> ($uX, $uX) { let tmp = (lhs as $uD) .wrapping_mul(mul as $uD) .wrapping_add(add as $uD); (tmp as $uX, (tmp >> ($n_h * 2)) as $uX) } // the number of bits in a $uX let n = $n_h * 2; if div == 0 { $zero_div_fn() } // Trying to use a normalization shift function will cause inelegancies in the code and // inefficiencies for architectures with a native count leading zeros instruction. The // undersubtracting algorithm needs both values (keeping the original `div_lz` but // updating `duo_lz` multiple times), so we assume hardware support for fast // `leading_zeros` calculation. let div_lz = div.leading_zeros(); let mut duo_lz = duo.leading_zeros(); // the possible ranges of `duo` and `div` at this point: // `0 <= duo < 2^n_d` // `1 <= div < 2^n_d` // quotient is 0 or 1 branch if div_lz <= duo_lz { // The quotient cannot be more than 1. The highest set bit of `duo` needs to be at // least one place higher than `div` for the quotient to be more than 1. if duo >= div { return (1, duo - div); } else { return (0, duo); } } // `_sb` is the number of significant bits (from the ones place to the highest set bit) // `{2, 2^div_sb} <= duo < 2^n_d` // `1 <= div < {2^duo_sb, 2^(n_d - 1)}` // smaller division branch if duo_lz >= n { // `duo < 2^n` so it will fit in a $uX. `div` will also fit in a $uX (because of the // `div_lz <= duo_lz` branch) so no numerical error. let (quo, rem) = $half_division(duo as $uX, div as $uX); return (quo as $uD, rem as $uD); } // `{2^n, 2^div_sb} <= duo < 2^n_d` // `1 <= div < {2^duo_sb, 2^(n_d - 1)}` // short division branch if div_lz >= (n + $n_h) { // `1 <= div < {2^duo_sb, 2^n_h}` // It is barely possible to improve the performance of this by calculating the // reciprocal and removing one `$half_division`, but only if the CPU can do fast // multiplications in parallel. Other reciprocal based methods can remove two // `$half_division`s, but have multiplications that cannot be done in parallel and // reduce performance. I have decided to use this trivial short division method and // rely on the CPU having quick divisions. let duo_hi = (duo >> n) as $uX; let div_0 = div as $uH as $uX; let (quo_hi, rem_3) = $half_division(duo_hi, div_0); let duo_mid = ((duo >> $n_h) as $uH as $uX) | (rem_3 << $n_h); let (quo_1, rem_2) = $half_division(duo_mid, div_0); let duo_lo = (duo as $uH as $uX) | (rem_2 << $n_h); let (quo_0, rem_1) = $half_division(duo_lo, div_0); return ( (quo_0 as $uD) | ((quo_1 as $uD) << $n_h) | ((quo_hi as $uD) << n), rem_1 as $uD, ); } // relative leading significant bits, cannot overflow because of above branches let lz_diff = div_lz - duo_lz; // `{2^n, 2^div_sb} <= duo < 2^n_d` // `2^n_h <= div < {2^duo_sb, 2^(n_d - 1)}` // `mul` or `mul - 1` branch if lz_diff < $n_h { // Two possibility division algorithm // The most significant bits of `duo` and `div` are within `$n_h` bits of each // other. If we take the `n` most significant bits of `duo` and divide them by the // corresponding bits in `div`, it produces a quotient value `quo`. It happens that // `quo` or `quo - 1` will always be the correct quotient for the whole number. In // other words, the bits less significant than the `n` most significant bits of // `duo` and `div` can only influence the quotient to be one of two values. // Because there are only two possibilities, there only needs to be one `$uH` sized // division, a `$uH` by `$uD` multiplication, and only one branch with a few simple // operations. // // Proof that the true quotient can only be `quo` or `quo - 1`. // All `/` operators here are floored divisions. // // `shift` is the number of bits not in the higher `n` significant bits of `duo`. // (definitions) // 0. shift = n - duo_lz // 1. duo_sig_n == duo / 2^shift // 2. div_sig_n == div / 2^shift // 3. quo == duo_sig_n / div_sig_n // // // We are trying to find the true quotient, `true_quo`. // 4. true_quo = duo / div. (definition) // // This is true because of the bits that are cut off during the bit shift. // 5. duo_sig_n * 2^shift <= duo < (duo_sig_n + 1) * 2^shift. // 6. div_sig_n * 2^shift <= div < (div_sig_n + 1) * 2^shift. // // Dividing each bound of (5) by each bound of (6) gives 4 possibilities for what // `true_quo == duo / div` is bounded by: // (duo_sig_n * 2^shift) / (div_sig_n * 2^shift) // (duo_sig_n * 2^shift) / ((div_sig_n + 1) * 2^shift) // ((duo_sig_n + 1) * 2^shift) / (div_sig_n * 2^shift) // ((duo_sig_n + 1) * 2^shift) / ((div_sig_n + 1) * 2^shift) // // Simplifying each of these four: // duo_sig_n / div_sig_n // duo_sig_n / (div_sig_n + 1) // (duo_sig_n + 1) / div_sig_n // (duo_sig_n + 1) / (div_sig_n + 1) // // Taking the smallest and the largest of these as the low and high bounds // and replacing `duo / div` with `true_quo`: // 7. duo_sig_n / (div_sig_n + 1) <= true_quo < (duo_sig_n + 1) / div_sig_n // // The `lz_diff < n_h` conditional on this branch makes sure that `div_sig_n` is at // least `2^n_h`, and the `div_lz <= duo_lz` branch makes sure that the highest bit // of `div_sig_n` is not the `2^(n - 1)` bit. // 8. `2^(n - 1) <= duo_sig_n < 2^n` // 9. `2^n_h <= div_sig_n < 2^(n - 1)` // // We want to prove that either // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1)` or that // `(duo_sig_n + 1) / div_sig_n == duo_sig_n / (div_sig_n + 1) + 1`. // // We also want to prove that `quo` is one of these: // `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or // `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n`. // // When 1 is added to the numerator of `duo_sig_n / div_sig_n` to produce // `(duo_sig_n + 1) / div_sig_n`, it is not possible that the value increases by // more than 1 with floored integer arithmetic and `div_sig_n != 0`. Consider // `x/y + 1 < (x + 1)/y` <=> `x/y + 1 < x/y + 1/y` <=> `1 < 1/y` <=> `y < 1`. // `div_sig_n` is a nonzero integer. Thus, // 10. `duo_sig_n / div_sig_n == (duo_sig_n + 1) / div_sig_n` or // `(duo_sig_n / div_sig_n) + 1 == (duo_sig_n + 1) / div_sig_n. // // When 1 is added to the denominator of `duo_sig_n / div_sig_n` to produce // `duo_sig_n / (div_sig_n + 1)`, it is not possible that the value decreases by // more than 1 with the bounds (8) and (9). Consider `x/y - 1 <= x/(y + 1)` <=> // `(x - y)/y < x/(y + 1)` <=> `(y + 1)*(x - y) < x*y` <=> `x*y - y*y + x - y < x*y` // <=> `x < y*y + y`. The smallest value of `div_sig_n` is `2^n_h` and the largest // value of `duo_sig_n` is `2^n - 1`. Substituting reveals `2^n - 1 < 2^n + 2^n_h`. // Thus, // 11. `duo_sig_n / div_sig_n == duo_sig_n / (div_sig_n + 1)` or // `(duo_sig_n / div_sig_n) - 1` == duo_sig_n / (div_sig_n + 1)` // // Combining both (10) and (11), we know that // `quo - 1 <= duo_sig_n / (div_sig_n + 1) <= true_quo // < (duo_sig_n + 1) / div_sig_n <= quo + 1` and therefore: // 12. quo - 1 <= true_quo < quo + 1 // // In a lot of division algorithms using smaller divisions to construct a larger // division, we often encounter a situation where the approximate `quo` value // calculated from a smaller division is multiple increments away from the true // `quo` value. In those algorithms, multiple correction steps have to be applied. // Those correction steps may need more multiplications to test `duo - (quo*div)` // again. Because of the fact that our `quo` can only be one of two values, we can // see if `duo - (quo*div)` overflows. If it did overflow, then we know that we have // the larger of the two values (since the true quotient is unique, and any larger // quotient will cause `duo - (quo*div)` to be negative). Also because there is only // one correction needed, we can calculate the remainder `duo - (true_quo*div) == // duo - ((quo - 1)*div) == duo - (quo*div - div) == duo + div - quo*div`. // If `duo - (quo*div)` did not overflow, then we have the correct answer. let shift = n - duo_lz; let duo_sig_n = (duo >> shift) as $uX; let div_sig_n = (div >> shift) as $uX; let quo = $half_division(duo_sig_n, div_sig_n).0; // The larger `quo` value can overflow `$uD` in the right circumstances. This is a // manual `carrying_mul_add` with overflow checking. let div_lo = div as $uX; let div_hi = (div >> n) as $uX; let (tmp_lo, carry) = carrying_mul(quo, div_lo); let (tmp_hi, overflow) = carrying_mul_add(quo, div_hi, carry); let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); if (overflow != 0) || (duo < tmp) { return ( (quo - 1) as $uD, // Both the addition and subtraction can overflow, but when combined end up // as a correct positive number. duo.wrapping_add(div).wrapping_sub(tmp), ); } else { return (quo as $uD, duo - tmp); } } // Undersubtracting long division algorithm. // Instead of clearing a minimum of 1 bit from `duo` per iteration via binary long // division, `n_h - 1` bits are cleared per iteration with this algorithm. It is a more // complicated version of regular long division. Most integer division algorithms tend // to guess a part of the quotient, and may have a larger quotient than the true // quotient (which when multiplied by `div` will "oversubtract" the original dividend). // They then check if the quotient was in fact too large and then have to correct it. // This long division algorithm has been carefully constructed to always underguess the // quotient by slim margins. This allows different subalgorithms to be blindly jumped to // without needing an extra correction step. // // The only problem is that this subalgorithm will not work for many ranges of `duo` and // `div`. Fortunately, the short division, two possibility algorithm, and other simple // cases happen to exactly fill these gaps. // // For an example, consider the division of 76543210 by 213 and assume that `n_h` is // equal to two decimal digits (note: we are working with base 10 here for readability). // The first `sig_n_h` part of the divisor (21) is taken and is incremented by 1 to // prevent oversubtraction. We also record the number of extra places not a part of // the `sig_n` or `sig_n_h` parts. // // sig_n_h == 2 digits, sig_n == 4 digits // // vvvv <- `duo_sig_n` // 76543210 // ^^^^ <- extra places in duo, `duo_extra == 4` // // vv <- `div_sig_n_h` // 213 // ^ <- extra places in div, `div_extra == 1` // // The difference in extra places, `duo_extra - div_extra == extra_shl == 3`, is used // for shifting partial sums in the long division. // // In the first step, the first `sig_n` part of duo (7654) is divided by // `div_sig_n_h_add_1` (22), which results in a partial quotient of 347. This is // multiplied by the whole divisor to make 73911, which is shifted left by `extra_shl` // and subtracted from duo. The partial quotient is also shifted left by `extra_shl` to // be added to `quo`. // // 347 // ________ // |76543210 // -73911 // 2632210 // // Variables dependent on duo have to be updated: // // vvvv <- `duo_sig_n == 2632` // 2632210 // ^^^ <- `duo_extra == 3` // // `extra_shl == 2` // // Two more steps are taken after this and then duo fits into `n` bits, and then a final // normal long division step is made. The partial quotients are all progressively added // to each other in the actual algorithm, but here I have left them all in a tower that // can be added together to produce the quotient, 359357. // // 14 // 443 // 119 // 347 // ________ // |76543210 // -73911 // 2632210 // -25347 // 97510 // -94359 // 3151 // -2982 // 169 <- the remainder let mut duo = duo; let mut quo: $uD = 0; // The number of lesser significant bits not a part of `div_sig_n_h` let div_extra = (n + $n_h) - div_lz; // The most significant `n_h` bits of div let div_sig_n_h = (div >> div_extra) as $uH; // This needs to be a `$uX` in case of overflow from the increment let div_sig_n_h_add1 = (div_sig_n_h as $uX) + 1; // `{2^n, 2^(div_sb + n_h)} <= duo < 2^n_d` // `2^n_h <= div < {2^(duo_sb - n_h), 2^n}` loop { // The number of lesser significant bits not a part of `duo_sig_n` let duo_extra = n - duo_lz; // The most significant `n` bits of `duo` let duo_sig_n = (duo >> duo_extra) as $uX; // the two possibility algorithm requires that the difference between msbs is less // than `n_h`, so the comparison is `<=` here. if div_extra <= duo_extra { // Undersubtracting long division step let quo_part = $half_division(duo_sig_n, div_sig_n_h_add1).0 as $uD; let extra_shl = duo_extra - div_extra; // Addition to the quotient. quo += (quo_part << extra_shl); // Subtraction from `duo`. At least `n_h - 1` bits are cleared from `duo` here. duo -= (div.wrapping_mul(quo_part) << extra_shl); } else { // Two possibility algorithm let shift = n - duo_lz; let duo_sig_n = (duo >> shift) as $uX; let div_sig_n = (div >> shift) as $uX; let quo_part = $half_division(duo_sig_n, div_sig_n).0; let div_lo = div as $uX; let div_hi = (div >> n) as $uX; let (tmp_lo, carry) = carrying_mul(quo_part, div_lo); // The undersubtracting long division algorithm has already run once, so // overflow beyond `$uD` bits is not possible here let (tmp_hi, _) = carrying_mul_add(quo_part, div_hi, carry); let tmp = (tmp_lo as $uD) | ((tmp_hi as $uD) << n); if duo < tmp { return ( quo + ((quo_part - 1) as $uD), duo.wrapping_add(div).wrapping_sub(tmp), ); } else { return (quo + (quo_part as $uD), duo - tmp); } } duo_lz = duo.leading_zeros(); if div_lz <= duo_lz { // quotient can have 0 or 1 added to it if div <= duo { return (quo + 1, duo - div); } else { return (quo, duo); } } // This can only happen if `div_sd < n` (because of previous "quo = 0 or 1" // branches), but it is not worth it to unroll further. if n <= duo_lz { // simple division and addition let tmp = $half_division(duo as $uX, div as $uX); return (quo + (tmp.0 as $uD), tmp.1 as $uD); } } } }; } compiler_builtins-0.1.101/src/int/udiv.rs000064400000000000000000000061721046102023000164130ustar 00000000000000#[cfg(not(feature = "public-test-deps"))] pub(crate) use crate::int::specialized_div_rem::*; #[cfg(feature = "public-test-deps")] pub use crate::int::specialized_div_rem::*; intrinsics! { #[maybe_use_optimized_c_shim] #[arm_aeabi_alias = __aeabi_uidiv] /// Returns `n / d` pub extern "C" fn __udivsi3(n: u32, d: u32) -> u32 { u32_div_rem(n, d).0 } #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umodsi3(n: u32, d: u32) -> u32 { u32_div_rem(n, d).1 } #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodsi4(n: u32, d: u32, rem: Option<&mut u32>) -> u32 { let quo_rem = u32_div_rem(n, d); if let Some(rem) = rem { *rem = quo_rem.1; } quo_rem.0 } #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n / d` pub extern "C" fn __udivdi3(n: u64, d: u64) -> u64 { u64_div_rem(n, d).0 } #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n % d` pub extern "C" fn __umoddi3(n: u64, d: u64) -> u64 { u64_div_rem(n, d).1 } #[avr_skip] #[maybe_use_optimized_c_shim] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmoddi4(n: u64, d: u64, rem: Option<&mut u64>) -> u64 { let quo_rem = u64_div_rem(n, d); if let Some(rem) = rem { *rem = quo_rem.1; } quo_rem.0 } // Note: we use block configuration and not `if cfg!(...)`, because we need to entirely disable // the existence of `u128_div_rem` to get 32-bit SPARC to compile, see `u128_divide_sparc` docs. #[avr_skip] #[win64_128bit_abi_hack] /// Returns `n / d` pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 { #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { u128_div_rem(n, d).0 } #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { u128_divide_sparc(n, d, &mut 0) } } #[avr_skip] #[win64_128bit_abi_hack] /// Returns `n % d` pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 { #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { u128_div_rem(n, d).1 } #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { let mut rem = 0; u128_divide_sparc(n, d, &mut rem); rem } } #[avr_skip] #[win64_128bit_abi_hack] /// Returns `n / d` and sets `*rem = n % d` pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { #[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))] { let quo_rem = u128_div_rem(n, d); if let Some(rem) = rem { *rem = quo_rem.1; } quo_rem.0 } #[cfg(any(target_arch = "sparc", target_arch = "sparc64"))] { let mut tmp = 0; let quo = u128_divide_sparc(n, d, &mut tmp); if let Some(rem) = rem { *rem = tmp; } quo } } } compiler_builtins-0.1.101/src/lib.rs000064400000000000000000000052051046102023000154140ustar 00000000000000#![cfg_attr(feature = "compiler-builtins", compiler_builtins)] #![cfg_attr(not(feature = "no-asm"), feature(asm))] #![feature(abi_unadjusted)] #![cfg_attr(not(feature = "no-asm"), feature(global_asm))] #![feature(cfg_target_has_atomic)] #![feature(compiler_builtins)] #![feature(core_ffi_c)] #![feature(core_intrinsics)] #![feature(inline_const)] #![feature(lang_items)] #![feature(linkage)] #![feature(naked_functions)] #![feature(repr_simd)] #![no_builtins] #![no_std] #![allow(unused_features)] #![allow(internal_features)] // We use `u128` in a whole bunch of places which we currently agree with the // compiler on ABIs and such, so we should be "good enough" for now and changes // to the `u128` ABI will be reflected here. #![allow(improper_ctypes, improper_ctypes_definitions)] // `mem::swap` cannot be used because it may generate references to memcpy in unoptimized code. #![allow(clippy::manual_swap)] // Support compiling on both stage0 and stage1 which may differ in supported stable features. #![allow(stable_features)] // We disable #[no_mangle] for tests so that we can verify the test results // against the native compiler-rt implementations of the builtins. // NOTE cfg(all(feature = "c", ..)) indicate that compiler-rt provides an arch optimized // implementation of that intrinsic and we'll prefer to use that // NOTE(aapcs, aeabi, arm) ARM targets use intrinsics named __aeabi_* instead of the intrinsics // that follow "x86 naming convention" (e.g. addsf3). Those aeabi intrinsics must adhere to the // AAPCS calling convention (`extern "aapcs"`) because that's how LLVM will call them. #[cfg(test)] extern crate core; #[macro_use] mod macros; pub mod float; pub mod int; #[cfg(any( all(target_family = "wasm", target_os = "unknown"), all(target_arch = "x86_64", target_os = "none"), all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "arm", target_os = "none"), all(target_arch = "xtensa", target_os = "none"), all(target_arch = "mips", target_os = "none"), target_os = "xous", all(target_vendor = "fortanix", target_env = "sgx"), target_os = "windows" ))] pub mod math; pub mod mem; #[cfg(target_arch = "arm")] pub mod arm; #[cfg(target_arch = "aarch64")] pub mod aarch64; #[cfg(all(target_arch = "aarch64", target_os = "linux", not(feature = "no-asm"),))] pub mod aarch64_linux; #[cfg(all( kernel_user_helpers, any(target_os = "linux", target_os = "android"), target_arch = "arm" ))] pub mod arm_linux; #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] pub mod riscv; #[cfg(target_arch = "x86")] pub mod x86; #[cfg(target_arch = "x86_64")] pub mod x86_64; pub mod probestack; compiler_builtins-0.1.101/src/macros.rs000064400000000000000000000444711046102023000161420ustar 00000000000000//! Macros shared throughout the compiler-builtins implementation /// Changes the visibility to `pub` if feature "public-test-deps" is set #[cfg(not(feature = "public-test-deps"))] macro_rules! public_test_dep { ($(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*) => { $(#[$($meta)*])* pub(crate) $ident $($tokens)* }; } /// Changes the visibility to `pub` if feature "public-test-deps" is set #[cfg(feature = "public-test-deps")] macro_rules! public_test_dep { {$(#[$($meta:meta)*])* pub(crate) $ident:ident $($tokens:tt)*} => { $(#[$($meta)*])* pub $ident $($tokens)* }; } /// The "main macro" used for defining intrinsics. /// /// The compiler-builtins library is super platform-specific with tons of crazy /// little tweaks for various platforms. As a result it *could* involve a lot of /// #[cfg] and macro soup, but the intention is that this macro alleviates a lot /// of that complexity. Ideally this macro has all the weird ABI things /// platforms need and elsewhere in this library it just looks like normal Rust /// code. /// /// When the weak-intrinsics feature is enabled, all intrinsics functions are /// marked with #[linkage = "weak"] so that they can be replaced by another /// implementation at link time. This is particularly useful for mixed Rust/C++ /// binaries that want to use the C++ intrinsics, otherwise linking against the /// Rust stdlib will replace those from the compiler-rt library. /// /// This macro is structured to be invoked with a bunch of functions that looks /// like: /// ```ignore /// intrinsics! { /// pub extern "C" fn foo(a: i32) -> u32 { /// // ... /// } /// /// #[nonstandard_attribute] /// pub extern "C" fn bar(a: i32) -> u32 { /// // ... /// } /// } /// ``` /// /// Each function is defined in a manner that looks like a normal Rust function. /// The macro then accepts a few nonstandard attributes that can decorate /// various functions. Each of the attributes is documented below with what it /// can do, and each of them slightly tweaks how further expansion happens. /// /// A quick overview of attributes supported right now are: /// /// * `weak` - indicates that the function should always be given weak linkage. /// This attribute must come before other attributes, as the other attributes /// will generate the final output function and need to have `weak` modify /// them. /// * `maybe_use_optimized_c_shim` - indicates that the Rust implementation is /// ignored if an optimized C version was compiled. /// * `aapcs_on_arm` - forces the ABI of the function to be `"aapcs"` on ARM and /// the specified ABI everywhere else. /// * `unadjusted_on_win64` - like `aapcs_on_arm` this switches to the /// `"unadjusted"` abi on Win64 and the specified abi elsewhere. /// * `win64_128bit_abi_hack` - this attribute is used for 128-bit integer /// intrinsics where the ABI is slightly tweaked on Windows platforms, but /// it's a normal ABI elsewhere for returning a 128 bit integer. /// * `arm_aeabi_alias` - handles the "aliasing" of various intrinsics on ARM /// their otherwise typical names to other prefixed ones. macro_rules! intrinsics { () => (); // Support cfg_attr: ( #[cfg_attr($e:meta, $($attr:tt)*)] $(#[$($attrs:tt)*])* pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg($e)] intrinsics! { #[$($attr)*] $(#[$($attrs)*])* pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { $($body)* } } #[cfg(not($e))] intrinsics! { $(#[$($attrs)*])* pub extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // Same as above but for unsafe. ( #[cfg_attr($e:meta, $($attr:tt)*)] $(#[$($attrs:tt)*])* pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg($e)] intrinsics! { #[$($attr)*] $(#[$($attrs)*])* pub unsafe extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { $($body)* } } #[cfg(not($e))] intrinsics! { $(#[$($attrs)*])* pub unsafe extern $abi fn $name($($argname: $ty),*) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // Explicit weak linkage gets dropped when weak-intrinsics is on since it // will be added unconditionally to all intrinsics and would conflict // otherwise. ( #[weak] $(#[$($attr:tt)*])* pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg(feature = "weak-intrinsics")] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } #[cfg(not(feature = "weak-intrinsics"))] intrinsics! { $(#[$($attr)*])* #[linkage = "weak"] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // Same as above but for unsafe. ( #[weak] $(#[$($attr:tt)*])* pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg(feature = "weak-intrinsics")] intrinsics! { $(#[$($attr)*])* pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } #[cfg(not(feature = "weak-intrinsics"))] intrinsics! { $(#[$($attr)*])* #[linkage = "weak"] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // Right now there's a bunch of architecture-optimized intrinsics in the // stock compiler-rt implementation. Not all of these have been ported over // to Rust yet so when the `c` feature of this crate is enabled we fall back // to the architecture-specific versions which should be more optimized. The // purpose of this macro is to easily allow specifying this. // // The `#[maybe_use_optimized_c_shim]` attribute indicates that this // intrinsic may have an optimized C version. In these situations the build // script, if the C code is enabled and compiled, will emit a cfg directive // to get passed to rustc for our compilation. If that cfg is set we skip // the Rust implementation, but if the attribute is not enabled then we // compile in the Rust implementation. ( #[maybe_use_optimized_c_shim] $(#[$($attr:tt)*])* pub $(unsafe $(@ $empty:tt)? )? extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg($name = "optimized-c")] #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { extern $abi { fn $name($($argname: $ty),*) $(-> $ret)?; } unsafe { $name($($argname),*) } } #[cfg(not($name = "optimized-c"))] intrinsics! { $(#[$($attr)*])* pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // We recognize the `#[aapcs_on_arm]` attribute here and generate the // same intrinsic but force it to have the `"aapcs"` calling convention on // ARM and `"C"` elsewhere. ( #[aapcs_on_arm] $(#[$($attr:tt)*])* pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg(target_arch = "arm")] intrinsics! { $(#[$($attr)*])* pub extern "aapcs" fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } #[cfg(not(target_arch = "arm"))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // Like aapcs above we recognize an attribute for the "unadjusted" abi on // win64 for some methods. ( #[unadjusted_on_win64] $(#[$($attr:tt)*])* pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg(all(any(windows, all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64"))] intrinsics! { $(#[$($attr)*])* pub extern "unadjusted" fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } #[cfg(not(all(any(windows, all(target_os = "uefi", target_arch = "x86_64")), target_pointer_width = "64")))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // Some intrinsics on win64 which return a 128-bit integer have an.. unusual // calling convention. That's managed here with this "abi hack" which alters // the generated symbol's ABI. // // This will still define a function in this crate with the given name and // signature, but the actual symbol for the intrinsic may have a slightly // different ABI on win64. ( #[win64_128bit_abi_hack] $(#[$($attr:tt)*])* pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] $(#[$($attr)*])* #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } #[cfg(all(any(windows, target_os = "uefi"), target_arch = "x86_64"))] pub mod $name { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) -> $crate::macros::win64_128bit_abi_hack::U64x2 { let e: $($ret)? = super::$name($($argname),*); $crate::macros::win64_128bit_abi_hack::U64x2::from(e) } } #[cfg(not(all(any(windows, target_os = "uefi"), target_arch = "x86_64")))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // A bunch of intrinsics on ARM are aliased in the standard compiler-rt // build under `__aeabi_*` aliases, and LLVM will call these instead of the // original function. The aliasing here is used to generate these symbols in // the object file. ( #[arm_aeabi_alias = $alias:ident] $(#[$($attr:tt)*])* pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg(target_arch = "arm")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } #[cfg(target_arch = "arm")] pub mod $name { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } #[cfg(target_arch = "arm")] pub mod $alias { #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(any(all(not(windows), not(target_vendor="apple"), feature = "weak-intrinsics")), linkage = "weak")] pub extern "aapcs" fn $alias( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } #[cfg(not(target_arch = "arm"))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // C mem* functions are only generated when the "mem" feature is enabled. ( #[mem_builtin] $(#[$($attr:tt)*])* pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( $(#[$($attr)*])* pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } #[cfg(feature = "mem")] pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } intrinsics!($($rest)*); ); // Naked functions are special: we can't generate wrappers for them since // they use a custom calling convention. ( #[naked] $(#[$($attr:tt)*])* pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( pub mod $name { #[naked] $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // For some intrinsics, AVR uses a custom calling convention¹ that does not // match our definitions here. Ideally we would just use hand-written naked // functions, but that's quite a lot of code to port² - so for the time // being we are just ignoring the problematic functions, letting avr-gcc // (which is required to compile to AVR anyway) link them from libgcc. // // ¹ https://gcc.gnu.org/wiki/avr-gcc (see "Exceptions to the Calling // Convention") // ² https://github.com/gcc-mirror/gcc/blob/31048012db98f5ec9c2ba537bfd850374bdd771f/libgcc/config/avr/lib1funcs.S ( #[avr_skip] $(#[$($attr:tt)*])* pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( #[cfg(not(target_arch = "avr"))] intrinsics! { $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } } intrinsics!($($rest)*); ); // This is the final catch-all rule. At this point we generate an // intrinsic with a conditional `#[no_mangle]` directive to avoid // interfering with duplicate symbols and whatnot during testing. // // The implementation is placed in a separate module, to take advantage // of the fact that rustc partitions functions into code generation // units based on module they are defined in. As a result we will have // a separate object file for each intrinsic. For further details see // corresponding PR in rustc https://github.com/rust-lang/rust/pull/70846 // // After the intrinsic is defined we just continue with the rest of the // input we were given. ( $(#[$($attr:tt)*])* pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( $(#[$($attr)*])* pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } intrinsics!($($rest)*); ); // Same as the above for unsafe functions. ( $(#[$($attr:tt)*])* pub unsafe extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? { $($body:tt)* } $($rest:tt)* ) => ( $(#[$($attr)*])* pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { $($body)* } pub mod $name { $(#[$($attr)*])* #[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg_attr(feature = "weak-intrinsics", linkage = "weak")] pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? { super::$name($($argname),*) } } intrinsics!($($rest)*); ); } // Hack for LLVM expectations for ABI on windows. This is used by the // `#[win64_128bit_abi_hack]` attribute recognized above #[cfg(all(any(windows, target_os = "uefi"), target_pointer_width = "64"))] pub mod win64_128bit_abi_hack { #[repr(simd)] pub struct U64x2(u64, u64); impl From for U64x2 { fn from(i: i128) -> U64x2 { use crate::int::DInt; let j = i as u128; U64x2(j.lo(), j.hi()) } } impl From for U64x2 { fn from(i: u128) -> U64x2 { use crate::int::DInt; U64x2(i.lo(), i.hi()) } } } compiler_builtins-0.1.101/src/math.rs000064400000000000000000000104511046102023000155760ustar 00000000000000#[allow(dead_code)] #[path = "../libm/src/math/mod.rs"] mod libm; #[allow(unused_macros)] macro_rules! no_mangle { ($(fn $fun:ident($($iid:ident : $ity:ty),+) -> $oty:ty;)+) => { intrinsics! { $( pub extern "C" fn $fun($($iid: $ity),+) -> $oty { self::libm::$fun($($iid),+) } )+ } } } #[cfg(any( all( target_family = "wasm", target_os = "unknown", not(target_env = "wasi") ), target_os = "xous", all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx") ))] no_mangle! { fn acos(x: f64) -> f64; fn asin(x: f64) -> f64; fn cbrt(x: f64) -> f64; fn expm1(x: f64) -> f64; fn hypot(x: f64, y: f64) -> f64; fn tan(x: f64) -> f64; fn cos(x: f64) -> f64; fn expf(x: f32) -> f32; fn log2(x: f64) -> f64; fn log2f(x: f32) -> f32; fn log10(x: f64) -> f64; fn log10f(x: f32) -> f32; fn log(x: f64) -> f64; fn logf(x: f32) -> f32; fn fmin(x: f64, y: f64) -> f64; fn fminf(x: f32, y: f32) -> f32; fn fmax(x: f64, y: f64) -> f64; fn fmaxf(x: f32, y: f32) -> f32; fn round(x: f64) -> f64; fn roundf(x: f32) -> f32; fn rint(x: f64) -> f64; fn rintf(x: f32) -> f32; fn sin(x: f64) -> f64; fn pow(x: f64, y: f64) -> f64; fn powf(x: f32, y: f32) -> f32; fn fmod(x: f64, y: f64) -> f64; fn fmodf(x: f32, y: f32) -> f32; fn acosf(n: f32) -> f32; fn atan2f(a: f32, b: f32) -> f32; fn atanf(n: f32) -> f32; fn coshf(n: f32) -> f32; fn expm1f(n: f32) -> f32; fn fdim(a: f64, b: f64) -> f64; fn fdimf(a: f32, b: f32) -> f32; fn log1pf(n: f32) -> f32; fn sinhf(n: f32) -> f32; fn tanhf(n: f32) -> f32; fn ldexp(f: f64, n: i32) -> f64; fn ldexpf(f: f32, n: i32) -> f32; fn tgamma(x: f64) -> f64; fn tgammaf(x: f32) -> f32; fn atan(x: f64) -> f64; fn atan2(x: f64, y: f64) -> f64; fn cosh(x: f64) -> f64; fn log1p(x: f64) -> f64; fn sinh(x: f64) -> f64; fn tanh(x: f64) -> f64; fn cosf(x: f32) -> f32; fn exp(x: f64) -> f64; fn sinf(x: f32) -> f32; fn exp2(x: f64) -> f64; fn exp2f(x: f32) -> f32; fn fma(x: f64, y: f64, z: f64) -> f64; fn fmaf(x: f32, y: f32, z: f32) -> f32; fn asinf(n: f32) -> f32; fn cbrtf(n: f32) -> f32; fn hypotf(x: f32, y: f32) -> f32; fn tanf(n: f32) -> f32; } #[cfg(any( all( target_family = "wasm", target_os = "unknown", not(target_env = "wasi") ), target_os = "xous", all(target_arch = "x86_64", target_os = "uefi"), all(target_arch = "xtensa", target_os = "none"), all(target_vendor = "fortanix", target_env = "sgx"), target_os = "windows" ))] intrinsics! { pub extern "C" fn lgamma_r(x: f64, s: &mut i32) -> f64 { let r = self::libm::lgamma_r(x); *s = r.1; r.0 } pub extern "C" fn lgammaf_r(x: f32, s: &mut i32) -> f32 { let r = self::libm::lgammaf_r(x); *s = r.1; r.0 } } #[cfg(any( target_os = "xous", target_os = "uefi", all(target_arch = "xtensa", target_os = "none"), ))] no_mangle! { fn sqrtf(x: f32) -> f32; fn sqrt(x: f64) -> f64; } #[cfg(any( all(target_vendor = "fortanix", target_env = "sgx"), all(target_arch = "xtensa", target_os = "none"), target_os = "xous", target_os = "uefi" ))] no_mangle! { fn ceil(x: f64) -> f64; fn ceilf(x: f32) -> f32; fn floor(x: f64) -> f64; fn floorf(x: f32) -> f32; fn trunc(x: f64) -> f64; fn truncf(x: f32) -> f32; } // only for the thumb*-none-eabi*, riscv32*-none-elf, x86_64-unknown-none and mips*-unknown-none targets that lack the floating point instruction set #[cfg(any( all(target_arch = "arm", target_os = "none"), all(target_arch = "riscv32", not(target_feature = "f"), target_os = "none"), all(target_arch = "x86_64", target_os = "none"), all(target_arch = "mips", target_os = "none"), ))] no_mangle! { fn fmin(x: f64, y: f64) -> f64; fn fminf(x: f32, y: f32) -> f32; fn fmax(x: f64, y: f64) -> f64; fn fmaxf(x: f32, y: f32) -> f32; // `f64 % f64` fn fmod(x: f64, y: f64) -> f64; // `f32 % f32` fn fmodf(x: f32, y: f32) -> f32; } compiler_builtins-0.1.101/src/mem/impls.rs000064400000000000000000000237641046102023000165620ustar 00000000000000use core::intrinsics::likely; const WORD_SIZE: usize = core::mem::size_of::(); const WORD_MASK: usize = WORD_SIZE - 1; // If the number of bytes involved exceed this threshold we will opt in word-wise copy. // The value here selected is max(2 * WORD_SIZE, 16): // * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through // word-wise copy. // * The word-wise copy logic needs to perform some checks so it has some small overhead. // ensures that even on 32-bit platforms we have copied at least 8 bytes through // word-wise copy so the saving of word-wise copy outweights the fixed overhead. const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 { 2 * WORD_SIZE } else { 16 }; #[cfg(feature = "mem-unaligned")] unsafe fn read_usize_unaligned(x: *const usize) -> usize { // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which // is translated to memcpy in LLVM. let x_read = (x as *const [u8; core::mem::size_of::()]).read(); core::mem::transmute(x_read) } #[inline(always)] pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) { #[inline(always)] unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { let dest_end = dest.add(n); while dest < dest_end { *dest = *src; dest = dest.add(1); src = src.add(1); } } #[inline(always)] unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; let dest_end = dest.add(n) as *mut usize; while dest_usize < dest_end { *dest_usize = *src_usize; dest_usize = dest_usize.add(1); src_usize = src_usize.add(1); } } #[cfg(not(feature = "mem-unaligned"))] #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let dest_end = dest.add(n) as *mut usize; // Calculate the misalignment offset and shift needed to reassemble value. let offset = src as usize & WORD_MASK; let shift = offset * 8; // Realign src let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; // This will read (but won't use) bytes out of bound. // cfg needed because not all targets will have atomic loads that can be lowered // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) #[cfg(target_has_atomic_load_store = "ptr")] let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); #[cfg(not(target_has_atomic_load_store = "ptr"))] let mut prev_word = core::ptr::read_volatile(src_aligned); while dest_usize < dest_end { src_aligned = src_aligned.add(1); let cur_word = *src_aligned; #[cfg(target_endian = "little")] let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift); #[cfg(target_endian = "big")] let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift); prev_word = cur_word; *dest_usize = resembled; dest_usize = dest_usize.add(1); } } #[cfg(feature = "mem-unaligned")] #[inline(always)] unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; let dest_end = dest.add(n) as *mut usize; while dest_usize < dest_end { *dest_usize = read_usize_unaligned(src_usize); dest_usize = dest_usize.add(1); src_usize = src_usize.add(1); } } if n >= WORD_COPY_THRESHOLD { // Align dest // Because of n >= 2 * WORD_SIZE, dst_misalignment < n let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK; copy_forward_bytes(dest, src, dest_misalignment); dest = dest.add(dest_misalignment); src = src.add(dest_misalignment); n -= dest_misalignment; let n_words = n & !WORD_MASK; let src_misalignment = src as usize & WORD_MASK; if likely(src_misalignment == 0) { copy_forward_aligned_words(dest, src, n_words); } else { copy_forward_misaligned_words(dest, src, n_words); } dest = dest.add(n_words); src = src.add(n_words); n -= n_words; } copy_forward_bytes(dest, src, n); } #[inline(always)] pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) { // The following backward copy helper functions uses the pointers past the end // as their inputs instead of pointers to the start! #[inline(always)] unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) { let dest_start = dest.sub(n); while dest_start < dest { dest = dest.sub(1); src = src.sub(1); *dest = *src; } } #[inline(always)] unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; let dest_start = dest.sub(n) as *mut usize; while dest_start < dest_usize { dest_usize = dest_usize.sub(1); src_usize = src_usize.sub(1); *dest_usize = *src_usize; } } #[cfg(not(feature = "mem-unaligned"))] #[inline(always)] unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let dest_start = dest.sub(n) as *mut usize; // Calculate the misalignment offset and shift needed to reassemble value. let offset = src as usize & WORD_MASK; let shift = offset * 8; // Realign src_aligned let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize; // This will read (but won't use) bytes out of bound. // cfg needed because not all targets will have atomic loads that can be lowered // (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I) #[cfg(target_has_atomic_load_store = "ptr")] let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned); #[cfg(not(target_has_atomic_load_store = "ptr"))] let mut prev_word = core::ptr::read_volatile(src_aligned); while dest_start < dest_usize { src_aligned = src_aligned.sub(1); let cur_word = *src_aligned; #[cfg(target_endian = "little")] let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift; #[cfg(target_endian = "big")] let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift; prev_word = cur_word; dest_usize = dest_usize.sub(1); *dest_usize = resembled; } } #[cfg(feature = "mem-unaligned")] #[inline(always)] unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) { let mut dest_usize = dest as *mut usize; let mut src_usize = src as *mut usize; let dest_start = dest.sub(n) as *mut usize; while dest_start < dest_usize { dest_usize = dest_usize.sub(1); src_usize = src_usize.sub(1); *dest_usize = read_usize_unaligned(src_usize); } } let mut dest = dest.add(n); let mut src = src.add(n); if n >= WORD_COPY_THRESHOLD { // Align dest // Because of n >= 2 * WORD_SIZE, dst_misalignment < n let dest_misalignment = dest as usize & WORD_MASK; copy_backward_bytes(dest, src, dest_misalignment); dest = dest.sub(dest_misalignment); src = src.sub(dest_misalignment); n -= dest_misalignment; let n_words = n & !WORD_MASK; let src_misalignment = src as usize & WORD_MASK; if likely(src_misalignment == 0) { copy_backward_aligned_words(dest, src, n_words); } else { copy_backward_misaligned_words(dest, src, n_words); } dest = dest.sub(n_words); src = src.sub(n_words); n -= n_words; } copy_backward_bytes(dest, src, n); } #[inline(always)] pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { #[inline(always)] pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) { let end = s.add(n); while s < end { *s = c; s = s.add(1); } } #[inline(always)] pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) { let mut broadcast = c as usize; let mut bits = 8; while bits < WORD_SIZE * 8 { broadcast |= broadcast << bits; bits *= 2; } let mut s_usize = s as *mut usize; let end = s.add(n) as *mut usize; while s_usize < end { *s_usize = broadcast; s_usize = s_usize.add(1); } } if likely(n >= WORD_COPY_THRESHOLD) { // Align s // Because of n >= 2 * WORD_SIZE, dst_misalignment < n let misalignment = (s as usize).wrapping_neg() & WORD_MASK; set_bytes_bytes(s, c, misalignment); s = s.add(misalignment); n -= misalignment; let n_words = n & !WORD_MASK; set_bytes_words(s, c, n_words); s = s.add(n_words); n -= n_words; } set_bytes_bytes(s, c, n); } #[inline(always)] pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { let mut i = 0; while i < n { let a = *s1.add(i); let b = *s2.add(i); if a != b { return a as i32 - b as i32; } i += 1; } 0 } #[inline(always)] pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { let mut n = 0; while *s != 0 { n += 1; s = s.add(1); } n } compiler_builtins-0.1.101/src/mem/mod.rs000064400000000000000000000172331046102023000162070ustar 00000000000000// Trying to satisfy clippy here is hopeless #![allow(clippy::style)] #[allow(warnings)] #[cfg(target_pointer_width = "16")] type c_int = i16; #[allow(warnings)] #[cfg(not(target_pointer_width = "16"))] type c_int = i32; use core::intrinsics::{atomic_load_unordered, atomic_store_unordered, exact_div}; use core::mem; use core::ops::{BitOr, Shl}; // memcpy/memmove/memset have optimized implementations on some architectures #[cfg_attr( all(not(feature = "no-asm"), target_arch = "x86_64"), path = "x86_64.rs" )] mod impls; intrinsics! { #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn memcpy(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { impls::copy_forward(dest, src, n); dest } #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn memmove(dest: *mut u8, src: *const u8, n: usize) -> *mut u8 { let delta = (dest as usize).wrapping_sub(src as usize); if delta >= n { // We can copy forwards because either dest is far enough ahead of src, // or src is ahead of dest (and delta overflowed). impls::copy_forward(dest, src, n); } else { impls::copy_backward(dest, src, n); } dest } #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn memset(s: *mut u8, c: crate::mem::c_int, n: usize) -> *mut u8 { impls::set_bytes(s, c as u8, n); s } #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { impls::compare_bytes(s1, s2, n) } #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn bcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { memcmp(s1, s2, n) } #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), weak)] #[mem_builtin] pub unsafe extern "C" fn strlen(s: *const core::ffi::c_char) -> usize { impls::c_string_length(s) } } // `bytes` must be a multiple of `mem::size_of::()` #[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] fn memcpy_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { unsafe { let n = exact_div(bytes, mem::size_of::()); let mut i = 0; while i < n { atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); i += 1; } } } // `bytes` must be a multiple of `mem::size_of::()` #[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] fn memmove_element_unordered_atomic(dest: *mut T, src: *const T, bytes: usize) { unsafe { let n = exact_div(bytes, mem::size_of::()); if src < dest as *const T { // copy from end let mut i = n; while i != 0 { i -= 1; atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); } } else { // copy from beginning let mut i = 0; while i < n { atomic_store_unordered(dest.add(i), atomic_load_unordered(src.add(i))); i += 1; } } } } // `T` must be a primitive integer type, and `bytes` must be a multiple of `mem::size_of::()` #[cfg_attr(not(target_has_atomic_load_store = "8"), allow(dead_code))] fn memset_element_unordered_atomic(s: *mut T, c: u8, bytes: usize) where T: Copy + From + Shl + BitOr, { unsafe { let n = exact_div(bytes, mem::size_of::()); // Construct a value of type `T` consisting of repeated `c` // bytes, to let us ensure we write each `T` atomically. let mut x = T::from(c); let mut i = 1; while i < mem::size_of::() { x = x << 8 | T::from(c); i += 1; } // Write it to `s` let mut i = 0; while i < n { atomic_store_unordered(s.add(i), x); i += 1; } } } intrinsics! { #[cfg(target_has_atomic_load_store = "8")] pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "16")] pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "32")] pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "64")] pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "128")] pub unsafe extern "C" fn __llvm_memcpy_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { memcpy_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "8")] pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_1(dest: *mut u8, src: *const u8, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "16")] pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_2(dest: *mut u16, src: *const u16, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "32")] pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_4(dest: *mut u32, src: *const u32, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "64")] pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_8(dest: *mut u64, src: *const u64, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "128")] pub unsafe extern "C" fn __llvm_memmove_element_unordered_atomic_16(dest: *mut u128, src: *const u128, bytes: usize) -> () { memmove_element_unordered_atomic(dest, src, bytes); } #[cfg(target_has_atomic_load_store = "8")] pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_1(s: *mut u8, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } #[cfg(target_has_atomic_load_store = "16")] pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_2(s: *mut u16, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } #[cfg(target_has_atomic_load_store = "32")] pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_4(s: *mut u32, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } #[cfg(target_has_atomic_load_store = "64")] pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_8(s: *mut u64, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } #[cfg(target_has_atomic_load_store = "128")] pub unsafe extern "C" fn __llvm_memset_element_unordered_atomic_16(s: *mut u128, c: u8, bytes: usize) -> () { memset_element_unordered_atomic(s, c, bytes); } } compiler_builtins-0.1.101/src/mem/x86_64.rs000064400000000000000000000235071046102023000163670ustar 00000000000000// On most modern Intel and AMD processors, "rep movsq" and "rep stosq" have // been enhanced to perform better than an simple qword loop, making them ideal // for implementing memcpy/memset. Note that "rep cmps" has received no such // enhancement, so it is not used to implement memcmp. // // On certain recent Intel processors, "rep movsb" and "rep stosb" have been // further enhanced to automatically select the best microarchitectural // implementation based on length and alignment. See the following features from // the "Intel® 64 and IA-32 Architectures Optimization Reference Manual": // - ERMSB - Enhanced REP MOVSB and STOSB (Ivy Bridge and later) // - FSRM - Fast Short REP MOV (Ice Lake and later) // - Fast Zero-Length MOVSB (On no current hardware) // - Fast Short STOSB (On no current hardware) // // To simplify things, we switch to using the byte-based variants if the "ermsb" // feature is present at compile-time. We don't bother detecting other features. // Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". use core::arch::asm; use core::intrinsics; use core::mem; #[inline(always)] #[cfg(target_feature = "ermsb")] pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. core::arch::asm!( "repe movsb (%rsi), (%rdi)", inout("rcx") count => _, inout("rdi") dest => _, inout("rsi") src => _, options(att_syntax, nostack, preserves_flags) ); } #[inline(always)] #[cfg(not(target_feature = "ermsb"))] pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, count: usize) { let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); // Separating the blocks gives the compiler more freedom to reorder instructions. asm!( "rep movsb", inout("ecx") pre_byte_count => _, inout("rdi") dest => dest, inout("rsi") src => src, options(att_syntax, nostack, preserves_flags) ); asm!( "rep movsq", inout("rcx") qword_count => _, inout("rdi") dest => dest, inout("rsi") src => src, options(att_syntax, nostack, preserves_flags) ); asm!( "rep movsb", inout("ecx") byte_count => _, inout("rdi") dest => _, inout("rsi") src => _, options(att_syntax, nostack, preserves_flags) ); } #[inline(always)] pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, count: usize) { let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); // We can't separate this block due to std/cld asm!( "std", "rep movsb", "sub $7, %rsi", "sub $7, %rdi", "mov {qword_count}, %rcx", "rep movsq", "test {pre_byte_count:e}, {pre_byte_count:e}", "add $7, %rsi", "add $7, %rdi", "mov {pre_byte_count:e}, %ecx", "rep movsb", "cld", pre_byte_count = in(reg) pre_byte_count, qword_count = in(reg) qword_count, inout("ecx") byte_count => _, inout("rdi") dest.add(count - 1) => _, inout("rsi") src.add(count - 1) => _, // We modify flags, but we restore it afterwards options(att_syntax, nostack, preserves_flags) ); } #[inline(always)] #[cfg(target_feature = "ermsb")] pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { // FIXME: Use the Intel syntax once we drop LLVM 9 support on rust-lang/rust. core::arch::asm!( "repe stosb %al, (%rdi)", inout("rcx") count => _, inout("rdi") dest => _, inout("al") c => _, options(att_syntax, nostack, preserves_flags) ) } #[inline(always)] #[cfg(not(target_feature = "ermsb"))] pub unsafe fn set_bytes(mut dest: *mut u8, c: u8, count: usize) { let c = c as u64 * 0x0101_0101_0101_0101; let (pre_byte_count, qword_count, byte_count) = rep_param(dest, count); // Separating the blocks gives the compiler more freedom to reorder instructions. asm!( "rep stosb", inout("ecx") pre_byte_count => _, inout("rdi") dest => dest, in("rax") c, options(att_syntax, nostack, preserves_flags) ); asm!( "rep stosq", inout("rcx") qword_count => _, inout("rdi") dest => dest, in("rax") c, options(att_syntax, nostack, preserves_flags) ); asm!( "rep stosb", inout("ecx") byte_count => _, inout("rdi") dest => _, in("rax") c, options(att_syntax, nostack, preserves_flags) ); } #[inline(always)] pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { #[inline(always)] unsafe fn cmp(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32 where T: Clone + Copy + Eq, U: Clone + Copy + Eq, F: FnOnce(*const U, *const U, usize) -> i32, { // Ensure T is not a ZST. const { assert!(mem::size_of::() != 0) }; let end = a.add(intrinsics::unchecked_div(n, mem::size_of::())); while a != end { if a.read_unaligned() != b.read_unaligned() { return f(a.cast(), b.cast(), mem::size_of::()); } a = a.add(1); b = b.add(1); } f( a.cast(), b.cast(), intrinsics::unchecked_rem(n, mem::size_of::()), ) } let c1 = |mut a: *const u8, mut b: *const u8, n| { for _ in 0..n { if a.read() != b.read() { return i32::from(a.read()) - i32::from(b.read()); } a = a.add(1); b = b.add(1); } 0 }; let c2 = |a: *const u16, b, n| cmp(a, b, n, c1); let c4 = |a: *const u32, b, n| cmp(a, b, n, c2); let c8 = |a: *const u64, b, n| cmp(a, b, n, c4); let c16 = |a: *const u128, b, n| cmp(a, b, n, c8); c16(a.cast(), b.cast(), n) } // In order to process more than on byte simultaneously when executing strlen, // two things must be considered: // * An n byte read with an n-byte aligned address will never cross // a page boundary and will always succeed. Any smaller alignment // may result in a read that will cross a page boundary, which may // trigger an access violation. // * Surface Rust considers any kind of out-of-bounds read as undefined // behaviour. To dodge this, memory access operations are written // using inline assembly. #[cfg(target_feature = "sse2")] #[inline(always)] pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { use core::arch::x86_64::{__m128i, _mm_cmpeq_epi8, _mm_movemask_epi8, _mm_set1_epi8}; let mut n = 0; // The use of _mm_movemask_epi8 and company allow for speedups, // but they aren't cheap by themselves. Thus, possibly small strings // are handled in simple loops. for _ in 0..4 { if *s == 0 { return n; } n += 1; s = s.add(1); } // Shave of the least significand bits to align the address to a 16 // byte boundary. The shaved of bits are used to correct the first iteration. let align = s as usize & 15; let mut s = ((s as usize) - align) as *const __m128i; let zero = _mm_set1_epi8(0); let x = { let r; asm!( "movdqa ({addr}), {dest}", addr = in(reg) s, dest = out(xmm_reg) r, options(att_syntax, nostack), ); r }; let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) >> align; if cmp != 0 { return n + cmp.trailing_zeros() as usize; } n += 16 - align; s = s.add(1); loop { let x = { let r; asm!( "movdqa ({addr}), {dest}", addr = in(reg) s, dest = out(xmm_reg) r, options(att_syntax, nostack), ); r }; let cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(x, zero)) as u32; if cmp == 0 { n += 16; s = s.add(1); } else { return n + cmp.trailing_zeros() as usize; } } } // Provided for scenarios like kernel development, where SSE might not // be available. #[cfg(not(target_feature = "sse2"))] #[inline(always)] pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize { let mut n = 0; // Check bytes in steps of one until // either a zero byte is discovered or // pointer is aligned to an eight byte boundary. while s as usize & 7 != 0 { if *s == 0 { return n; } n += 1; s = s.add(1); } // Check bytes in steps of eight until a zero // byte is discovered. let mut s = s as *const u64; loop { let mut cs = { let r: u64; asm!( "mov ({addr}), {dest}", addr = in(reg) s, dest = out(reg) r, options(att_syntax, nostack), ); r }; // Detect if a word has a zero byte, taken from // https://graphics.stanford.edu/~seander/bithacks.html if (cs.wrapping_sub(0x0101010101010101) & !cs & 0x8080808080808080) != 0 { loop { if cs & 255 == 0 { return n; } else { cs >>= 8; n += 1; } } } else { n += 8; s = s.add(1); } } } /// Determine optimal parameters for a `rep` instruction. fn rep_param(dest: *mut u8, mut count: usize) -> (usize, usize, usize) { // Unaligned writes are still slow on modern processors, so align the destination address. let pre_byte_count = ((8 - (dest as usize & 0b111)) & 0b111).min(count); count -= pre_byte_count; let qword_count = count >> 3; let byte_count = count & 0b111; (pre_byte_count, qword_count, byte_count) } compiler_builtins-0.1.101/src/probestack.rs000064400000000000000000000246201046102023000170050ustar 00000000000000// Copyright 2017 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // // Licensed under the Apache License, Version 2.0 or the MIT license // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. //! This module defines the `__rust_probestack` intrinsic which is used in the //! implementation of "stack probes" on certain platforms. //! //! The purpose of a stack probe is to provide a static guarantee that if a //! thread has a guard page then a stack overflow is guaranteed to hit that //! guard page. If a function did not have a stack probe then there's a risk of //! having a stack frame *larger* than the guard page, so a function call could //! skip over the guard page entirely and then later hit maybe the heap or //! another thread, possibly leading to security vulnerabilities such as [The //! Stack Clash], for example. //! //! [The Stack Clash]: https://blog.qualys.com/securitylabs/2017/06/19/the-stack-clash //! //! The `__rust_probestack` is called in the prologue of functions whose stack //! size is larger than the guard page, for example larger than 4096 bytes on //! x86. This function is then responsible for "touching" all pages relevant to //! the stack to ensure that that if any of them are the guard page we'll hit //! them guaranteed. //! //! The precise ABI for how this function operates is defined by LLVM. There's //! no real documentation as to what this is, so you'd basically need to read //! the LLVM source code for reference. Often though the test cases can be //! illuminating as to the ABI that's generated, or just looking at the output //! of `llc`. //! //! Note that `#[naked]` is typically used here for the stack probe because the //! ABI corresponds to no actual ABI. //! //! Finally it's worth noting that at the time of this writing LLVM only has //! support for stack probes on x86 and x86_64. There's no support for stack //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would //! be more than welcome to accept such a change! #![cfg(not(feature = "mangled-names"))] // Windows already has builtins to do this. #![cfg(not(windows))] // All these builtins require assembly #![cfg(not(feature = "no-asm"))] // We only define stack probing for these architectures today. #![cfg(any(target_arch = "x86_64", target_arch = "x86"))] extern "C" { pub fn __rust_probestack(); } // A wrapper for our implementation of __rust_probestack, which allows us to // keep the assembly inline while controlling all CFI directives in the assembly // emitted for the function. // // This is the ELF version. #[cfg(not(any(target_vendor = "apple", target_os = "uefi")))] macro_rules! define_rust_probestack { ($body: expr) => { concat!( " .pushsection .text.__rust_probestack .globl __rust_probestack .type __rust_probestack, @function .hidden __rust_probestack __rust_probestack: ", $body, " .size __rust_probestack, . - __rust_probestack .popsection " ) }; } #[cfg(all(target_os = "uefi", target_arch = "x86_64"))] macro_rules! define_rust_probestack { ($body: expr) => { concat!( " .globl __rust_probestack __rust_probestack: ", $body ) }; } // Same as above, but for Mach-O. Note that the triple underscore // is deliberate #[cfg(target_vendor = "apple")] macro_rules! define_rust_probestack { ($body: expr) => { concat!( " .globl ___rust_probestack ___rust_probestack: ", $body ) }; } // In UEFI x86 arch, triple underscore is deliberate. #[cfg(all(target_os = "uefi", target_arch = "x86"))] macro_rules! define_rust_probestack { ($body: expr) => { concat!( " .globl ___rust_probestack ___rust_probestack: ", $body ) }; } // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, // ensuring that if any pages are unmapped we'll make a page fault. // // The ABI here is that the stack frame size is located in `%rax`. Upon // return we're not supposed to modify `%rsp` or `%rax`. // // Any changes to this function should be replicated to the SGX version below. #[cfg(all( target_arch = "x86_64", not(all(target_env = "sgx", target_vendor = "fortanix")) ))] core::arch::global_asm!( define_rust_probestack!( " .cfi_startproc pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp, -16 movq %rsp, %rbp .cfi_def_cfa_register %rbp mov %rax,%r11 // duplicate %rax as we're clobbering %r11 // Main loop, taken in one page increments. We're decrementing rsp by // a page each time until there's less than a page remaining. We're // guaranteed that this function isn't called unless there's more than a // page needed. // // Note that we're also testing against `8(%rsp)` to account for the 8 // bytes pushed on the stack orginally with our return address. Using // `8(%rsp)` simulates us testing the stack pointer in the caller's // context. // It's usually called when %rax >= 0x1000, but that's not always true. // Dynamic stack allocation, which is needed to implement unsized // rvalues, triggers stackprobe even if %rax < 0x1000. // Thus we have to check %r11 first to avoid segfault. cmp $0x1000,%r11 jna 3f 2: sub $0x1000,%rsp test %rsp,8(%rsp) sub $0x1000,%r11 cmp $0x1000,%r11 ja 2b 3: // Finish up the last remaining stack space requested, getting the last // bits out of r11 sub %r11,%rsp test %rsp,8(%rsp) // Restore the stack pointer to what it previously was when entering // this function. The caller will readjust the stack pointer after we // return. add %rax,%rsp leave .cfi_def_cfa_register %rsp .cfi_adjust_cfa_offset -8 ret .cfi_endproc " ), options(att_syntax) ); // This function is the same as above, except that some instructions are // [manually patched for LVI]. // // [manually patched for LVI]: https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions #[cfg(all( target_arch = "x86_64", all(target_env = "sgx", target_vendor = "fortanix") ))] core::arch::global_asm!( define_rust_probestack!( " .cfi_startproc pushq %rbp .cfi_adjust_cfa_offset 8 .cfi_offset %rbp, -16 movq %rsp, %rbp .cfi_def_cfa_register %rbp mov %rax,%r11 // duplicate %rax as we're clobbering %r11 // Main loop, taken in one page increments. We're decrementing rsp by // a page each time until there's less than a page remaining. We're // guaranteed that this function isn't called unless there's more than a // page needed. // // Note that we're also testing against `8(%rsp)` to account for the 8 // bytes pushed on the stack orginally with our return address. Using // `8(%rsp)` simulates us testing the stack pointer in the caller's // context. // It's usually called when %rax >= 0x1000, but that's not always true. // Dynamic stack allocation, which is needed to implement unsized // rvalues, triggers stackprobe even if %rax < 0x1000. // Thus we have to check %r11 first to avoid segfault. cmp $0x1000,%r11 jna 3f 2: sub $0x1000,%rsp test %rsp,8(%rsp) sub $0x1000,%r11 cmp $0x1000,%r11 ja 2b 3: // Finish up the last remaining stack space requested, getting the last // bits out of r11 sub %r11,%rsp test %rsp,8(%rsp) // Restore the stack pointer to what it previously was when entering // this function. The caller will readjust the stack pointer after we // return. add %rax,%rsp leave .cfi_def_cfa_register %rsp .cfi_adjust_cfa_offset -8 pop %r11 lfence jmp *%r11 .cfi_endproc " ), options(att_syntax) ); #[cfg(all(target_arch = "x86", not(target_os = "uefi")))] // This is the same as x86_64 above, only translated for 32-bit sizes. Note // that on Unix we're expected to restore everything as it was, this // function basically can't tamper with anything. // // The ABI here is the same as x86_64, except everything is 32-bits large. core::arch::global_asm!( define_rust_probestack!( " .cfi_startproc push %ebp .cfi_adjust_cfa_offset 4 .cfi_offset %ebp, -8 mov %esp, %ebp .cfi_def_cfa_register %ebp push %ecx mov %eax,%ecx cmp $0x1000,%ecx jna 3f 2: sub $0x1000,%esp test %esp,8(%esp) sub $0x1000,%ecx cmp $0x1000,%ecx ja 2b 3: sub %ecx,%esp test %esp,8(%esp) add %eax,%esp pop %ecx leave .cfi_def_cfa_register %esp .cfi_adjust_cfa_offset -4 ret .cfi_endproc " ), options(att_syntax) ); #[cfg(all(target_arch = "x86", target_os = "uefi"))] // UEFI target is windows like target. LLVM will do _chkstk things like windows. // probestack function will also do things like _chkstk in MSVC. // So we need to sub %ax %sp in probestack when arch is x86. // // REF: Rust commit(74e80468347) // rust\src\llvm-project\llvm\lib\Target\X86\X86FrameLowering.cpp: 805 // Comments in LLVM: // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp // themselves. core::arch::global_asm!( define_rust_probestack!( " .cfi_startproc push %ebp .cfi_adjust_cfa_offset 4 .cfi_offset %ebp, -8 mov %esp, %ebp .cfi_def_cfa_register %ebp push %ecx push %edx mov %eax,%ecx cmp $0x1000,%ecx jna 3f 2: sub $0x1000,%esp test %esp,8(%esp) sub $0x1000,%ecx cmp $0x1000,%ecx ja 2b 3: sub %ecx,%esp test %esp,8(%esp) mov 4(%ebp),%edx mov %edx, 12(%esp) add %eax,%esp pop %edx pop %ecx leave sub %eax, %esp .cfi_def_cfa_register %esp .cfi_adjust_cfa_offset -4 ret .cfi_endproc " ), options(att_syntax) ); compiler_builtins-0.1.101/src/riscv.rs000064400000000000000000000032421046102023000157730ustar 00000000000000intrinsics! { // Ancient Egyptian/Ethiopian/Russian multiplication method // see https://en.wikipedia.org/wiki/Ancient_Egyptian_multiplication // // This is a long-available stock algorithm; e.g. it is documented in // Knuth's "The Art of Computer Programming" volume 2 (under the section // "Evaluation of Powers") since at least the 2nd edition (1981). // // The main attraction of this method is that it implements (software) // multiplication atop four simple operations: doubling, halving, checking // if a value is even/odd, and addition. This is *not* considered to be the // fastest multiplication method, but it may be amongst the simplest (and // smallest with respect to code size). // // for reference, see also implementation from gcc // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c // // and from LLVM (in relatively readable RISC-V assembly): // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/riscv/int_mul_impl.inc pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 { let (mut a, mut b) = (a, b); let mut r: u32 = 0; while a > 0 { if a & 1 > 0 { r = r.wrapping_add(b); } a >>= 1; b <<= 1; } r } #[cfg(not(target_feature = "m"))] pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 { let (mut a, mut b) = (a, b); let mut r: u64 = 0; while a > 0 { if a & 1 > 0 { r = r.wrapping_add(b); } a >>= 1; b <<= 1; } r } } compiler_builtins-0.1.101/src/x86.rs000064400000000000000000000050101046102023000152650ustar 00000000000000#![allow(unused_imports)] use core::intrinsics; // NOTE These functions are implemented using assembly because they using a custom // calling convention which can't be implemented using a normal Rust function // NOTE These functions are never mangled as they are not tested against compiler-rt // and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca intrinsics! { #[naked] #[cfg(all( windows, target_env = "gnu", not(feature = "no-asm") ))] pub unsafe extern "C" fn ___chkstk_ms() { core::arch::asm!( "push %ecx", "push %eax", "cmp $0x1000,%eax", "lea 12(%esp),%ecx", "jb 1f", "2:", "sub $0x1000,%ecx", "test %ecx,(%ecx)", "sub $0x1000,%eax", "cmp $0x1000,%eax", "ja 2b", "1:", "sub %eax,%ecx", "test %ecx,(%ecx)", "pop %eax", "pop %ecx", "ret", options(noreturn, att_syntax) ); } // FIXME: __alloca should be an alias to __chkstk #[naked] #[cfg(all( windows, target_env = "gnu", not(feature = "no-asm") ))] pub unsafe extern "C" fn __alloca() { core::arch::asm!( "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" options(noreturn, att_syntax) ); } #[naked] #[cfg(all( windows, target_env = "gnu", not(feature = "no-asm") ))] pub unsafe extern "C" fn ___chkstk() { core::arch::asm!( "push %ecx", "cmp $0x1000,%eax", "lea 8(%esp),%ecx", // esp before calling this routine -> ecx "jb 1f", "2:", "sub $0x1000,%ecx", "test %ecx,(%ecx)", "sub $0x1000,%eax", "cmp $0x1000,%eax", "ja 2b", "1:", "sub %eax,%ecx", "test %ecx,(%ecx)", "lea 4(%esp),%eax", // load pointer to the return address into eax "mov %ecx,%esp", // install the new top of stack pointer into esp "mov -4(%eax),%ecx", // restore ecx "push (%eax)", // push return address onto the stack "sub %esp,%eax", // restore the original value in eax "ret", options(noreturn, att_syntax) ); } } compiler_builtins-0.1.101/src/x86_64.rs000064400000000000000000000055451046102023000156130ustar 00000000000000#![allow(unused_imports)] use core::intrinsics; // NOTE These functions are implemented using assembly because they using a custom // calling convention which can't be implemented using a normal Rust function // NOTE These functions are never mangled as they are not tested against compiler-rt // and mangling ___chkstk would break the `jmp ___chkstk` instruction in __alloca intrinsics! { #[naked] #[cfg(all( any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] pub unsafe extern "C" fn ___chkstk_ms() { core::arch::asm!( "push %rcx", "push %rax", "cmp $0x1000,%rax", "lea 24(%rsp),%rcx", "jb 1f", "2:", "sub $0x1000,%rcx", "test %rcx,(%rcx)", "sub $0x1000,%rax", "cmp $0x1000,%rax", "ja 2b", "1:", "sub %rax,%rcx", "test %rcx,(%rcx)", "pop %rax", "pop %rcx", "ret", options(noreturn, att_syntax) ); } #[naked] #[cfg(all( any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] pub unsafe extern "C" fn __alloca() { core::arch::asm!( "mov %rcx,%rax", // x64 _alloca is a normal function with parameter in rcx "jmp ___chkstk", // Jump to ___chkstk since fallthrough may be unreliable" options(noreturn, att_syntax) ); } #[naked] #[cfg(all( any(all(windows, target_env = "gnu"), target_os = "uefi"), not(feature = "no-asm") ))] pub unsafe extern "C" fn ___chkstk() { core::arch::asm!( "push %rcx", "cmp $0x1000,%rax", "lea 16(%rsp),%rcx", // rsp before calling this routine -> rcx "jb 1f", "2:", "sub $0x1000,%rcx", "test %rcx,(%rcx)", "sub $0x1000,%rax", "cmp $0x1000,%rax", "ja 2b", "1:", "sub %rax,%rcx", "test %rcx,(%rcx)", "lea 8(%rsp),%rax", // load pointer to the return address into rax "mov %rcx,%rsp", // install the new top of stack pointer into rsp "mov -8(%rax),%rcx", // restore rcx "push (%rax)", // push return address onto the stack "sub %rsp,%rax", // restore the original value in rax "ret", options(noreturn, att_syntax) ); } } // HACK(https://github.com/rust-lang/rust/issues/62785): x86_64-unknown-uefi needs special LLVM // support unless we emit the _fltused mod _fltused { #[no_mangle] #[used] #[cfg(target_os = "uefi")] static _fltused: i32 = 0; }