rayon-core-1.6.0/Cargo.toml.orig010066400247370024737000000025101353200457200147310ustar0000000000000000[package] name = "rayon-core" version = "1.6.0" # reminder to update html_root_url attribute authors = ["Niko Matsakis ", "Josh Stone "] description = "Core APIs for Rayon" license = "Apache-2.0/MIT" repository = "https://github.com/rayon-rs/rayon" documentation = "https://docs.rs/rayon/" links = "rayon-core" build = "build.rs" readme = "README.md" keywords = ["parallel", "thread", "concurrency", "join", "performance"] categories = ["concurrency"] # Some dependencies may not be their latest version, in order to support older rustc. [dependencies] num_cpus = "1.2" lazy_static = "1" crossbeam-deque = "0.7" crossbeam-queue = "0.1.2" crossbeam-utils = "0.6.5" [dev-dependencies] rand = "0.6" rand_xorshift = "0.1" scoped-tls = "1.0" [target.'cfg(unix)'.dev-dependencies] libc = "0.2" [[test]] name = "stack_overflow_crash" path = "tests/stack_overflow_crash.rs" harness = false # NB: having one [[test]] manually defined means we need to declare them all [[test]] name = "double_init_fail" path = "tests/double_init_fail.rs" [[test]] name = "init_zero_threads" path = "tests/init_zero_threads.rs" [[test]] name = "scope_join" path = "tests/scope_join.rs" [[test]] name = "simple_panic" path = "tests/simple_panic.rs" [[test]] name = "scoped_threadpool" path = "tests/scoped_threadpool.rs" rayon-core-1.6.0/Cargo.toml0000644000000034460000000000000111620ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "rayon-core" version = "1.6.0" authors = ["Niko Matsakis ", "Josh Stone "] build = "build.rs" links = "rayon-core" description = "Core APIs for Rayon" documentation = "https://docs.rs/rayon/" readme = "README.md" keywords = ["parallel", "thread", "concurrency", "join", "performance"] categories = ["concurrency"] license = "Apache-2.0/MIT" repository = "https://github.com/rayon-rs/rayon" [[test]] name = "stack_overflow_crash" path = "tests/stack_overflow_crash.rs" harness = false [[test]] name = "double_init_fail" path = "tests/double_init_fail.rs" [[test]] name = "init_zero_threads" path = "tests/init_zero_threads.rs" [[test]] name = "scope_join" path = "tests/scope_join.rs" [[test]] name = "simple_panic" path = "tests/simple_panic.rs" [[test]] name = "scoped_threadpool" path = "tests/scoped_threadpool.rs" [dependencies.crossbeam-deque] version = "0.7" [dependencies.crossbeam-queue] version = "0.1.2" [dependencies.crossbeam-utils] version = "0.6.5" [dependencies.lazy_static] version = "1" [dependencies.num_cpus] version = "1.2" [dev-dependencies.rand] version = "0.6" [dev-dependencies.rand_xorshift] version = "0.1" [dev-dependencies.scoped-tls] version = "1.0" [target."cfg(unix)".dev-dependencies.libc] version = "0.2" rayon-core-1.6.0/LICENSE-APACHE010066400247370024737000000251371336642636100140110ustar0000000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. rayon-core-1.6.0/LICENSE-MIT010066400247370024737000000020571336642636100135150ustar0000000000000000Copyright (c) 2010 The Rust Project Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rayon-core-1.6.0/README.md010066400247370024737000000014131353104640200133170ustar0000000000000000Rayon-core represents the "core, stable" APIs of Rayon: join, scope, and so forth, as well as the ability to create custom thread-pools with ThreadPool. Maybe worth mentioning: users are not necessarily intended to directly access rayon-core; all its APIs are mirror in the rayon crate. To that end, the examples in the docs use rayon::join and so forth rather than rayon_core::join. rayon-core aims to never, or almost never, have a breaking change to its API, because each revision of rayon-core also houses the global thread-pool (and hence if you have two simultaneous versions of rayon-core, you have two thread-pools). Please see [Rayon Docs] for details about using Rayon. [Rayon Docs]: https://docs.rs/rayon/ Rayon-core currently requires `rustc 1.28.0` or greater. rayon-core-1.6.0/build.rs010066400247370024737000000004261353104653000135120ustar0000000000000000// We need a build script to use `link = "rayon-core"`. But we're not // *actually* linking to anything, just making sure that we're the only // rayon-core in use. fn main() { // we don't need to rebuild for anything else println!("cargo:rerun-if-changed=build.rs"); } rayon-core-1.6.0/src/compile_fail/mod.rs010066400247370024737000000002431346467127000164130ustar0000000000000000// These modules contain `compile_fail` doc tests. mod quicksort_race1; mod quicksort_race2; mod quicksort_race3; mod rc_return; mod rc_upvar; mod scope_join_bad; rayon-core-1.6.0/src/compile_fail/quicksort_race1.rs010066400247370024737000000010271336642636100207340ustar0000000000000000/*! ```compile_fail,E0524 fn quick_sort(v: &mut [T]) { if v.len() <= 1 { return; } let mid = partition(v); let (lo, _hi) = v.split_at_mut(mid); rayon_core::join(|| quick_sort(lo), || quick_sort(lo)); //~ ERROR } fn partition(v: &mut [T]) -> usize { let pivot = v.len() - 1; let mut i = 0; for j in 0..pivot { if v[j] <= v[pivot] { v.swap(i, j); i += 1; } } v.swap(i, pivot); i } fn main() { } ``` */ rayon-core-1.6.0/src/compile_fail/quicksort_race2.rs010066400247370024737000000010261336642636100207340ustar0000000000000000/*! ```compile_fail,E0500 fn quick_sort(v: &mut [T]) { if v.len() <= 1 { return; } let mid = partition(v); let (lo, _hi) = v.split_at_mut(mid); rayon_core::join(|| quick_sort(lo), || quick_sort(v)); //~ ERROR } fn partition(v: &mut [T]) -> usize { let pivot = v.len() - 1; let mut i = 0; for j in 0..pivot { if v[j] <= v[pivot] { v.swap(i, j); i += 1; } } v.swap(i, pivot); i } fn main() { } ``` */ rayon-core-1.6.0/src/compile_fail/quicksort_race3.rs010066400247370024737000000010271336642636100207360ustar0000000000000000/*! ```compile_fail,E0524 fn quick_sort(v: &mut [T]) { if v.len() <= 1 { return; } let mid = partition(v); let (_lo, hi) = v.split_at_mut(mid); rayon_core::join(|| quick_sort(hi), || quick_sort(hi)); //~ ERROR } fn partition(v: &mut [T]) -> usize { let pivot = v.len() - 1; let mut i = 0; for j in 0..pivot { if v[j] <= v[pivot] { v.swap(i, j); i += 1; } } v.swap(i, pivot); i } fn main() { } ``` */ rayon-core-1.6.0/src/compile_fail/rc_return.rs010066400247370024737000000004161336642636100176410ustar0000000000000000/** ```compile_fail,E0277 use std::rc::Rc; fn main() { rayon_core::join(|| Rc::new(22), || ()); //~ ERROR } ``` */ mod left {} /** ```compile_fail,E0277 use std::rc::Rc; fn main() { rayon_core::join(|| (), || Rc::new(23)); //~ ERROR } ``` */ mod right {} rayon-core-1.6.0/src/compile_fail/rc_upvar.rs010066400247370024737000000002351336642636100174560ustar0000000000000000/*! ```compile_fail,E0277 use std::rc::Rc; fn main() { let r = Rc::new(22); rayon_core::join(|| r.clone(), || r.clone()); //~^ ERROR } ``` */ rayon-core-1.6.0/src/compile_fail/scope_join_bad.rs010066400247370024737000000005701352066016100205630ustar0000000000000000/*! ```compile_fail,E0373 fn bad_scope(f: F) where F: FnOnce(&i32) + Send, { rayon_core::scope(|s| { let x = 22; s.spawn(|_| f(&x)); //~ ERROR `x` does not live long enough }); } fn good_scope(f: F) where F: FnOnce(&i32) + Send, { let x = 22; rayon_core::scope(|s| { s.spawn(|_| f(&x)); }); } fn main() { } ``` */ rayon-core-1.6.0/src/internal/mod.rs010066400247370024737000000003541336642636100156070ustar0000000000000000//! The internal directory contains internal APIs not meant to be //! exposed to "end-users" of Rayon, but rather which are useful for //! constructing abstractions. //! //! These APIs are still unstable. pub mod task; pub mod worker; rayon-core-1.6.0/src/internal/task.rs010066400247370024737000000070511346467127000157730ustar0000000000000000//! Internal, unsafe APIs for creating scoped tasks. Intended for //! building abstractions atop the rayon-core thread pool, rather than //! direct use by end users. These APIs are mostly analogous to the //! (safe) `scope`/`spawn` APIs, but with some unsafe requirements //! that permit greater efficiency. use std::any::Any; use std::sync::Arc; /// Represents a task that can be scheduled onto the Rayon /// thread-pool. Once a task is scheduler, it will execute exactly /// once (eventually). pub trait Task: Send + Sync { /// Invoked by the thread-pool when the task is ready to execute. fn execute(this: Arc); } /// Represents a handle onto some Rayon scope. This could be either a /// local scope created by the `scope()` function or the global scope /// for a thread-pool. To get a scope-handle, you can invoke /// `ToScopeHandle::to_scope_handle()` on either a `scope` value or a /// `ThreadPool`. /// /// The existence of `ScopeHandler` offers a guarantee: /// /// - The Rust lifetime `'scope` will not end until the scope-handle /// is dropped, or until you invoke `panicked()` or `ok()`. /// /// This trait is intended to be used as follows: /// /// - You have a parallel task of type `T` to perform where `T: 's`, /// meaning that any references that `T` contains outlive the lifetime /// `'s`. /// - You obtain a scope handle `h` of type `H` where `H: /// ScopeHandle<'s>`; typically this would be by invoking /// `to_scope_handle()` on a Rayon scope (of type `Scope<'s>`) or a /// thread-pool (in which case `'s == 'static`). /// - You invoke `h.spawn()` to start your job(s). This may be done /// many times. /// - Note that `h.spawn()` is an unsafe method. You must ensure /// that your parallel jobs have completed before moving to /// the next step. /// - Eventually, when all invocations are complete, you invoke /// either `panicked()` or `ok()`. pub unsafe trait ScopeHandle<'scope>: 'scope { /// Enqueues a task for execution within the thread-pool. The task /// will eventually be invoked, and once it is, the `Arc` will be /// dropped. /// /// **Unsafe:** The caller must guarantee that the scope handle /// (`self`) will not be dropped (nor will `ok()` or `panicked()` /// be called) until the task executes. Otherwise, the lifetime /// `'scope` may end while the task is still pending. unsafe fn spawn_task(&self, task: Arc); /// Indicates that some sub-task of this scope panicked with the /// given `err`. This panic will be propagated back to the user as /// appropriate, depending on how this scope handle was derived. /// /// This takes ownership of the scope handle, meaning that once /// you invoke `panicked`, the scope is permitted to terminate /// (and, in particular, the Rust lifetime `'scope` may end). fn panicked(self, err: Box); /// Indicates that the sub-tasks of this scope that you have /// spawned concluded successfully. /// /// This takes ownership of the scope handle, meaning that once /// you invoke `panicked`, the scope is permitted to terminate /// (and, in particular, the Rust lifetime `'scope` may end). fn ok(self); } /// Converts a Rayon structure (typically a `Scope` or `ThreadPool`) /// into a "scope handle". See the `ScopeHandle` trait for more /// details. pub trait ToScopeHandle<'scope> { /// Scope handle type that gets produced. type ScopeHandle: ScopeHandle<'scope>; /// Convert the receiver into a scope handle. fn to_scope_handle(&self) -> Self::ScopeHandle; } rayon-core-1.6.0/src/internal/worker.rs010066400247370024737000000043661352066016100163360ustar0000000000000000//! Internal, unsafe APIs for manipulating or querying the current //! worker thread. Intended for building abstractions atop the //! rayon-core thread pool, rather than direct use by end users. use latch::LatchProbe; use registry; use std::fmt; /// Represents the active worker thread. pub struct WorkerThread<'w> { thread: &'w registry::WorkerThread, } impl<'w> WorkerThread<'w> { /// Causes the worker thread to wait until `f()` returns true. /// While the thread is waiting, it will attempt to steal work /// from other threads, and may go to sleep if there is no work to /// steal. /// /// **Dead-lock warning: This is a low-level interface and cannot /// be used to wait on arbitrary conditions.** In particular, if /// the Rayon thread goes to sleep, it will only be awoken when /// new rayon events occur (e.g., `spawn()` or `join()` is /// invoked, or one of the methods on a `ScopeHandle`). Therefore, /// you must ensure that, once the condition `f()` becomes true, /// some "rayon event" will also occur to ensure that waiting /// threads are awoken. pub unsafe fn wait_until_true(&self, f: F) where F: Fn() -> bool, { struct DummyLatch<'a, F: 'a> { f: &'a F, } impl<'a, F: Fn() -> bool> LatchProbe for DummyLatch<'a, F> { fn probe(&self) -> bool { (self.f)() } } self.thread.wait_until(&DummyLatch { f: &f }); } } impl<'w> fmt::Debug for WorkerThread<'w> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_struct("WorkerThread") .field("pool", &self.thread.registry().id()) .field("index", &self.thread.index()) .finish() } } /// If the current thread is a Rayon worker thread, then the callback /// is invoked with a reference to the worker-thread the result of /// that callback is returned with `Some`. Otherwise, if we are not on /// a Rayon worker thread, `None` is immediately returned. pub fn if_in_worker_thread(if_true: F) -> Option where F: FnOnce(&WorkerThread) -> R, { unsafe { let thread = registry::WorkerThread::current().as_ref()?; Some(if_true(&WorkerThread { thread })) } } rayon-core-1.6.0/src/job.rs010066400247370024737000000136671353104652500137730ustar0000000000000000use crossbeam_queue::SegQueue; use latch::Latch; use std::any::Any; use std::cell::UnsafeCell; use std::mem; use unwind; pub(super) enum JobResult { None, Ok(T), Panic(Box), } /// A `Job` is used to advertise work for other threads that they may /// want to steal. In accordance with time honored tradition, jobs are /// arranged in a deque, so that thieves can take from the top of the /// deque while the main worker manages the bottom of the deque. This /// deque is managed by the `thread_pool` module. pub(super) trait Job { /// Unsafe: this may be called from a different thread than the one /// which scheduled the job, so the implementer must ensure the /// appropriate traits are met, whether `Send`, `Sync`, or both. unsafe fn execute(this: *const Self); } /// Effectively a Job trait object. Each JobRef **must** be executed /// exactly once, or else data may leak. /// /// Internally, we store the job's data in a `*const ()` pointer. The /// true type is something like `*const StackJob<...>`, but we hide /// it. We also carry the "execute fn" from the `Job` trait. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub(super) struct JobRef { pointer: *const (), execute_fn: unsafe fn(*const ()), } unsafe impl Send for JobRef {} unsafe impl Sync for JobRef {} impl JobRef { /// Unsafe: caller asserts that `data` will remain valid until the /// job is executed. pub(super) unsafe fn new(data: *const T) -> JobRef where T: Job, { let fn_ptr: unsafe fn(*const T) = ::execute; // erase types: JobRef { pointer: data as *const (), execute_fn: mem::transmute(fn_ptr), } } #[inline] pub(super) unsafe fn execute(&self) { (self.execute_fn)(self.pointer) } } /// A job that will be owned by a stack slot. This means that when it /// executes it need not free any heap data, the cleanup occurs when /// the stack frame is later popped. The function parameter indicates /// `true` if the job was stolen -- executed on a different thread. pub(super) struct StackJob where L: Latch + Sync, F: FnOnce(bool) -> R + Send, R: Send, { pub(super) latch: L, func: UnsafeCell>, result: UnsafeCell>, } impl StackJob where L: Latch + Sync, F: FnOnce(bool) -> R + Send, R: Send, { pub(super) fn new(func: F, latch: L) -> StackJob { StackJob { latch, func: UnsafeCell::new(Some(func)), result: UnsafeCell::new(JobResult::None), } } pub(super) unsafe fn as_job_ref(&self) -> JobRef { JobRef::new(self) } pub(super) unsafe fn run_inline(self, stolen: bool) -> R { self.func.into_inner().unwrap()(stolen) } pub(super) unsafe fn into_result(self) -> R { self.result.into_inner().into_return_value() } } impl Job for StackJob where L: Latch + Sync, F: FnOnce(bool) -> R + Send, R: Send, { unsafe fn execute(this: *const Self) { fn call(func: impl FnOnce(bool) -> R) -> impl FnOnce() -> R { move || func(true) } let this = &*this; let abort = unwind::AbortIfPanic; let func = (*this.func.get()).take().unwrap(); (*this.result.get()) = match unwind::halt_unwinding(call(func)) { Ok(x) => JobResult::Ok(x), Err(x) => JobResult::Panic(x), }; this.latch.set(); mem::forget(abort); } } /// Represents a job stored in the heap. Used to implement /// `scope`. Unlike `StackJob`, when executed, `HeapJob` simply /// invokes a closure, which then triggers the appropriate logic to /// signal that the job executed. /// /// (Probably `StackJob` should be refactored in a similar fashion.) pub(super) struct HeapJob where BODY: FnOnce() + Send, { job: UnsafeCell>, } impl HeapJob where BODY: FnOnce() + Send, { pub(super) fn new(func: BODY) -> Self { HeapJob { job: UnsafeCell::new(Some(func)), } } /// Creates a `JobRef` from this job -- note that this hides all /// lifetimes, so it is up to you to ensure that this JobRef /// doesn't outlive any data that it closes over. pub(super) unsafe fn as_job_ref(self: Box) -> JobRef { let this: *const Self = mem::transmute(self); JobRef::new(this) } } impl Job for HeapJob where BODY: FnOnce() + Send, { unsafe fn execute(this: *const Self) { let this: Box = mem::transmute(this); let job = (*this.job.get()).take().unwrap(); job(); } } impl JobResult { /// Convert the `JobResult` for a job that has finished (and hence /// its JobResult is populated) into its return value. /// /// NB. This will panic if the job panicked. pub(super) fn into_return_value(self) -> T { match self { JobResult::None => unreachable!(), JobResult::Ok(x) => x, JobResult::Panic(x) => unwind::resume_unwinding(x), } } } /// Indirect queue to provide FIFO job priority. pub(super) struct JobFifo { inner: SegQueue, } impl JobFifo { pub(super) fn new() -> Self { JobFifo { inner: SegQueue::new(), } } pub(super) unsafe fn push(&self, job_ref: JobRef) -> JobRef { // A little indirection ensures that spawns are always prioritized in FIFO order. The // jobs in a thread's deque may be popped from the back (LIFO) or stolen from the front // (FIFO), but either way they will end up popping from the front of this queue. self.inner.push(job_ref); JobRef::new(self) } } impl Job for JobFifo { unsafe fn execute(this: *const Self) { // We "execute" a queue by executing its first job, FIFO. (*this).inner.pop().expect("job in fifo queue").execute() } } rayon-core-1.6.0/src/join/mod.rs010066400247370024737000000162321353104652500147260ustar0000000000000000use job::StackJob; use latch::{LatchProbe, SpinLatch}; use log::Event::*; use registry::{self, WorkerThread}; use std::any::Any; use unwind; use FnContext; #[cfg(test)] mod test; /// Takes two closures and *potentially* runs them in parallel. It /// returns a pair of the results from those closures. /// /// Conceptually, calling `join()` is similar to spawning two threads, /// one executing each of the two closures. However, the /// implementation is quite different and incurs very low /// overhead. The underlying technique is called "work stealing": the /// Rayon runtime uses a fixed pool of worker threads and attempts to /// only execute code in parallel when there are idle CPUs to handle /// it. /// /// When `join` is called from outside the thread pool, the calling /// thread will block while the closures execute in the pool. When /// `join` is called within the pool, the calling thread still actively /// participates in the thread pool. It will begin by executing closure /// A (on the current thread). While it is doing that, it will advertise /// closure B as being available for other threads to execute. Once closure A /// has completed, the current thread will try to execute closure B; /// if however closure B has been stolen, then it will look for other work /// while waiting for the thief to fully execute closure B. (This is the /// typical work-stealing strategy). /// /// # Examples /// /// This example uses join to perform a quick-sort (note this is not a /// particularly optimized implementation: if you **actually** want to /// sort for real, you should prefer [the `par_sort` method] offered /// by Rayon). /// /// [the `par_sort` method]: ../rayon/slice/trait.ParallelSliceMut.html#method.par_sort /// /// ```rust /// # use rayon_core as rayon; /// let mut v = vec![5, 1, 8, 22, 0, 44]; /// quick_sort(&mut v); /// assert_eq!(v, vec![0, 1, 5, 8, 22, 44]); /// /// fn quick_sort(v: &mut [T]) { /// if v.len() > 1 { /// let mid = partition(v); /// let (lo, hi) = v.split_at_mut(mid); /// rayon::join(|| quick_sort(lo), /// || quick_sort(hi)); /// } /// } /// /// // Partition rearranges all items `<=` to the pivot /// // item (arbitrary selected to be the last item in the slice) /// // to the first half of the slice. It then returns the /// // "dividing point" where the pivot is placed. /// fn partition(v: &mut [T]) -> usize { /// let pivot = v.len() - 1; /// let mut i = 0; /// for j in 0..pivot { /// if v[j] <= v[pivot] { /// v.swap(i, j); /// i += 1; /// } /// } /// v.swap(i, pivot); /// i /// } /// ``` /// /// # Warning about blocking I/O /// /// The assumption is that the closures given to `join()` are /// CPU-bound tasks that do not perform I/O or other blocking /// operations. If you do perform I/O, and that I/O should block /// (e.g., waiting for a network request), the overall performance may /// be poor. Moreover, if you cause one closure to be blocked waiting /// on another (for example, using a channel), that could lead to a /// deadlock. /// /// # Panics /// /// No matter what happens, both closures will always be executed. If /// a single closure panics, whether it be the first or second /// closure, that panic will be propagated and hence `join()` will /// panic with the same panic value. If both closures panic, `join()` /// will panic with the panic value from the first closure. pub fn join(oper_a: A, oper_b: B) -> (RA, RB) where A: FnOnce() -> RA + Send, B: FnOnce() -> RB + Send, RA: Send, RB: Send, { #[inline] fn call(f: impl FnOnce() -> R) -> impl FnOnce(FnContext) -> R { move |_| f() } join_context(call(oper_a), call(oper_b)) } /// Identical to `join`, except that the closures have a parameter /// that provides context for the way the closure has been called, /// especially indicating whether they're executing on a different /// thread than where `join_context` was called. This will occur if /// the second job is stolen by a different thread, or if /// `join_context` was called from outside the thread pool to begin /// with. pub fn join_context(oper_a: A, oper_b: B) -> (RA, RB) where A: FnOnce(FnContext) -> RA + Send, B: FnOnce(FnContext) -> RB + Send, RA: Send, RB: Send, { #[inline] fn call_a(f: impl FnOnce(FnContext) -> R, injected: bool) -> impl FnOnce() -> R { move || f(FnContext::new(injected)) } #[inline] fn call_b(f: impl FnOnce(FnContext) -> R) -> impl FnOnce(bool) -> R { move |migrated| f(FnContext::new(migrated)) } registry::in_worker(|worker_thread, injected| unsafe { log!(Join { worker: worker_thread.index() }); // Create virtual wrapper for task b; this all has to be // done here so that the stack frame can keep it all live // long enough. let job_b = StackJob::new(call_b(oper_b), SpinLatch::new()); let job_b_ref = job_b.as_job_ref(); worker_thread.push(job_b_ref); // Execute task a; hopefully b gets stolen in the meantime. let status_a = unwind::halt_unwinding(call_a(oper_a, injected)); let result_a = match status_a { Ok(v) => v, Err(err) => join_recover_from_panic(worker_thread, &job_b.latch, err), }; // Now that task A has finished, try to pop job B from the // local stack. It may already have been popped by job A; it // may also have been stolen. There may also be some tasks // pushed on top of it in the stack, and we will have to pop // those off to get to it. while !job_b.latch.probe() { if let Some(job) = worker_thread.take_local_job() { if job == job_b_ref { // Found it! Let's run it. // // Note that this could panic, but it's ok if we unwind here. log!(PoppedRhs { worker: worker_thread.index() }); let result_b = job_b.run_inline(injected); return (result_a, result_b); } else { log!(PoppedJob { worker: worker_thread.index() }); worker_thread.execute(job); } } else { // Local deque is empty. Time to steal from other // threads. log!(LostJob { worker: worker_thread.index() }); worker_thread.wait_until(&job_b.latch); debug_assert!(job_b.latch.probe()); break; } } (result_a, job_b.into_result()) }) } /// If job A panics, we still cannot return until we are sure that job /// B is complete. This is because it may contain references into the /// enclosing stack frame(s). #[cold] // cold path unsafe fn join_recover_from_panic( worker_thread: &WorkerThread, job_b_latch: &SpinLatch, err: Box, ) -> ! { worker_thread.wait_until(job_b_latch); unwind::resume_unwinding(err) } rayon-core-1.6.0/src/join/test.rs010066400247370024737000000063651352066016100151300ustar0000000000000000//! Tests for the join code. use join::*; use rand::distributions::Standard; use rand::{Rng, SeedableRng}; use rand_xorshift::XorShiftRng; use unwind; use ThreadPoolBuilder; fn quick_sort(v: &mut [T]) { if v.len() <= 1 { return; } let mid = partition(v); let (lo, hi) = v.split_at_mut(mid); join(|| quick_sort(lo), || quick_sort(hi)); } fn partition(v: &mut [T]) -> usize { let pivot = v.len() - 1; let mut i = 0; for j in 0..pivot { if v[j] <= v[pivot] { v.swap(i, j); i += 1; } } v.swap(i, pivot); i } fn seeded_rng() -> XorShiftRng { let mut seed = ::Seed::default(); (0..).zip(seed.as_mut()).for_each(|(i, x)| *x = i); XorShiftRng::from_seed(seed) } #[test] fn sort() { let mut rng = seeded_rng(); let mut data: Vec = rng.sample_iter(&Standard).take(6 * 1024).collect(); let mut sorted_data = data.clone(); sorted_data.sort(); quick_sort(&mut data); assert_eq!(data, sorted_data); } #[test] fn sort_in_pool() { let mut rng = seeded_rng(); let mut data: Vec = rng.sample_iter(&Standard).take(12 * 1024).collect(); let pool = ThreadPoolBuilder::new().build().unwrap(); let mut sorted_data = data.clone(); sorted_data.sort(); pool.install(|| quick_sort(&mut data)); assert_eq!(data, sorted_data); } #[test] #[should_panic(expected = "Hello, world!")] fn panic_propagate_a() { join(|| panic!("Hello, world!"), || ()); } #[test] #[should_panic(expected = "Hello, world!")] fn panic_propagate_b() { join(|| (), || panic!("Hello, world!")); } #[test] #[should_panic(expected = "Hello, world!")] fn panic_propagate_both() { join(|| panic!("Hello, world!"), || panic!("Goodbye, world!")); } #[test] fn panic_b_still_executes() { let mut x = false; match unwind::halt_unwinding(|| join(|| panic!("Hello, world!"), || x = true)) { Ok(_) => panic!("failed to propagate panic from closure A,"), Err(_) => assert!(x, "closure b failed to execute"), } } #[test] fn join_context_both() { // If we're not in a pool, both should be marked stolen as they're injected. let (a_migrated, b_migrated) = join_context(|a| a.migrated(), |b| b.migrated()); assert!(a_migrated); assert!(b_migrated); } #[test] fn join_context_neither() { // If we're already in a 1-thread pool, neither job should be stolen. let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); let (a_migrated, b_migrated) = pool.install(|| join_context(|a| a.migrated(), |b| b.migrated())); assert!(!a_migrated); assert!(!b_migrated); } #[test] fn join_context_second() { use std::sync::Barrier; // If we're already in a 2-thread pool, the second job should be stolen. let barrier = Barrier::new(2); let pool = ThreadPoolBuilder::new().num_threads(2).build().unwrap(); let (a_migrated, b_migrated) = pool.install(|| { join_context( |a| { barrier.wait(); a.migrated() }, |b| { barrier.wait(); b.migrated() }, ) }); assert!(!a_migrated); assert!(b_migrated); } rayon-core-1.6.0/src/latch.rs010066400247370024737000000127141352506731200143040ustar0000000000000000use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::{Condvar, Mutex}; use std::usize; use sleep::Sleep; /// We define various kinds of latches, which are all a primitive signaling /// mechanism. A latch starts as false. Eventually someone calls `set()` and /// it becomes true. You can test if it has been set by calling `probe()`. /// /// Some kinds of latches, but not all, support a `wait()` operation /// that will wait until the latch is set, blocking efficiently. That /// is not part of the trait since it is not possibly to do with all /// latches. /// /// The intention is that `set()` is called once, but `probe()` may be /// called any number of times. Once `probe()` returns true, the memory /// effects that occurred before `set()` become visible. /// /// It'd probably be better to refactor the API into two paired types, /// but that's a bit of work, and this is not a public API. /// /// ## Memory ordering /// /// Latches need to guarantee two things: /// /// - Once `probe()` returns true, all memory effects from the `set()` /// are visible (in other words, the set should synchronize-with /// the probe). /// - Once `set()` occurs, the next `probe()` *will* observe it. This /// typically requires a seq-cst ordering. See [the "tickle-then-get-sleepy" scenario in the sleep /// README](/src/sleep/README.md#tickle-then-get-sleepy) for details. pub(super) trait Latch: LatchProbe { /// Set the latch, signalling others. fn set(&self); } pub(super) trait LatchProbe { /// Test if the latch is set. fn probe(&self) -> bool; } /// Spin latches are the simplest, most efficient kind, but they do /// not support a `wait()` operation. They just have a boolean flag /// that becomes true when `set()` is called. pub(super) struct SpinLatch { b: AtomicBool, } impl SpinLatch { #[inline] pub(super) fn new() -> SpinLatch { SpinLatch { b: AtomicBool::new(false), } } } impl LatchProbe for SpinLatch { #[inline] fn probe(&self) -> bool { self.b.load(Ordering::SeqCst) } } impl Latch for SpinLatch { #[inline] fn set(&self) { self.b.store(true, Ordering::SeqCst); } } /// A Latch starts as false and eventually becomes true. You can block /// until it becomes true. pub(super) struct LockLatch { m: Mutex, v: Condvar, } impl LockLatch { #[inline] pub(super) fn new() -> LockLatch { LockLatch { m: Mutex::new(false), v: Condvar::new(), } } /// Block until latch is set, then resets this lock latch so it can be reused again. pub(super) fn wait_and_reset(&self) { let mut guard = self.m.lock().unwrap(); while !*guard { guard = self.v.wait(guard).unwrap(); } *guard = false; } /// Block until latch is set. pub(super) fn wait(&self) { let mut guard = self.m.lock().unwrap(); while !*guard { guard = self.v.wait(guard).unwrap(); } } } impl LatchProbe for LockLatch { #[inline] fn probe(&self) -> bool { // Not particularly efficient, but we don't really use this operation let guard = self.m.lock().unwrap(); *guard } } impl Latch for LockLatch { #[inline] fn set(&self) { let mut guard = self.m.lock().unwrap(); *guard = true; self.v.notify_all(); } } /// Counting latches are used to implement scopes. They track a /// counter. Unlike other latches, calling `set()` does not /// necessarily make the latch be considered `set()`; instead, it just /// decrements the counter. The latch is only "set" (in the sense that /// `probe()` returns true) once the counter reaches zero. #[derive(Debug)] pub(super) struct CountLatch { counter: AtomicUsize, } impl CountLatch { #[inline] pub(super) fn new() -> CountLatch { CountLatch { counter: AtomicUsize::new(1), } } #[inline] pub(super) fn increment(&self) { debug_assert!(!self.probe()); self.counter.fetch_add(1, Ordering::Relaxed); } } impl LatchProbe for CountLatch { #[inline] fn probe(&self) -> bool { // Need to acquire any memory reads before latch was set: self.counter.load(Ordering::SeqCst) == 0 } } impl Latch for CountLatch { /// Set the latch to true, releasing all threads who are waiting. #[inline] fn set(&self) { self.counter.fetch_sub(1, Ordering::SeqCst); } } /// A tickling latch wraps another latch type, and will also awaken a thread /// pool when it is set. This is useful for jobs injected between thread pools, /// so the source pool can continue processing its own work while waiting. pub(super) struct TickleLatch<'a, L: Latch> { inner: L, sleep: &'a Sleep, } impl<'a, L: Latch> TickleLatch<'a, L> { #[inline] pub(super) fn new(latch: L, sleep: &'a Sleep) -> Self { TickleLatch { inner: latch, sleep, } } } impl<'a, L: Latch> LatchProbe for TickleLatch<'a, L> { #[inline] fn probe(&self) -> bool { self.inner.probe() } } impl<'a, L: Latch> Latch for TickleLatch<'a, L> { #[inline] fn set(&self) { self.inner.set(); self.sleep.tickle(usize::MAX); } } impl<'a, L> LatchProbe for &'a L where L: LatchProbe, { fn probe(&self) -> bool { L::probe(self) } } impl<'a, L> Latch for &'a L where L: Latch, { fn set(&self) { L::set(self); } } rayon-core-1.6.0/src/lib.rs010066400247370024737000000636551353200457200137660ustar0000000000000000//! //! [Under construction](https://github.com/rayon-rs/rayon/issues/231) //! //! ## Restricting multiple versions //! //! In order to ensure proper coordination between threadpools, and especially //! to make sure there's only one global threadpool, `rayon-core` is actively //! restricted from building multiple versions of itself into a single target. //! You may see a build error like this in violation: //! //! ```text //! error: native library `rayon-core` is being linked to by more //! than one package, and can only be linked to by one package //! ``` //! //! While we strive to keep `rayon-core` semver-compatible, it's still //! possible to arrive at this situation if different crates have overly //! restrictive tilde or inequality requirements for `rayon-core`. The //! conflicting requirements will need to be resolved before the build will //! succeed. #![doc(html_root_url = "https://docs.rs/rayon-core/1.6")] #![deny(missing_debug_implementations)] #![deny(missing_docs)] #![deny(unreachable_pub)] use std::any::Any; use std::env; use std::error::Error; use std::fmt; use std::io; use std::marker::PhantomData; use std::str::FromStr; extern crate crossbeam_deque; extern crate crossbeam_queue; extern crate crossbeam_utils; #[cfg(any(debug_assertions, rayon_unstable))] #[macro_use] extern crate lazy_static; extern crate num_cpus; #[cfg(test)] extern crate rand; #[cfg(test)] extern crate rand_xorshift; #[macro_use] mod log; #[macro_use] mod private; mod job; mod join; mod latch; mod registry; mod scope; mod sleep; mod spawn; mod thread_pool; mod unwind; mod util; mod compile_fail; mod test; #[cfg(rayon_unstable)] pub mod internal; pub use join::{join, join_context}; pub use registry::ThreadBuilder; pub use scope::{scope, Scope}; pub use scope::{scope_fifo, ScopeFifo}; pub use spawn::{spawn, spawn_fifo}; pub use thread_pool::current_thread_has_pending_tasks; pub use thread_pool::current_thread_index; pub use thread_pool::ThreadPool; use registry::{CustomSpawn, DefaultSpawn, ThreadSpawn}; /// Returns the number of threads in the current registry. If this /// code is executing within a Rayon thread-pool, then this will be /// the number of threads for the thread-pool of the current /// thread. Otherwise, it will be the number of threads for the global /// thread-pool. /// /// This can be useful when trying to judge how many times to split /// parallel work (the parallel iterator traits use this value /// internally for this purpose). /// /// # Future compatibility note /// /// Note that unless this thread-pool was created with a /// builder that specifies the number of threads, then this /// number may vary over time in future versions (see [the /// `num_threads()` method for details][snt]). /// /// [snt]: struct.ThreadPoolBuilder.html#method.num_threads pub fn current_num_threads() -> usize { ::registry::Registry::current_num_threads() } /// Error when initializing a thread pool. #[derive(Debug)] pub struct ThreadPoolBuildError { kind: ErrorKind, } #[derive(Debug)] enum ErrorKind { GlobalPoolAlreadyInitialized, IOError(io::Error), } /// Used to create a new [`ThreadPool`] or to configure the global rayon thread pool. /// ## Creating a ThreadPool /// The following creates a thread pool with 22 threads. /// /// ```rust /// # use rayon_core as rayon; /// let pool = rayon::ThreadPoolBuilder::new().num_threads(22).build().unwrap(); /// ``` /// /// To instead configure the global thread pool, use [`build_global()`]: /// /// ```rust /// # use rayon_core as rayon; /// rayon::ThreadPoolBuilder::new().num_threads(22).build_global().unwrap(); /// ``` /// /// [`ThreadPool`]: struct.ThreadPool.html /// [`build_global()`]: struct.ThreadPoolBuilder.html#method.build_global pub struct ThreadPoolBuilder { /// The number of threads in the rayon thread pool. /// If zero will use the RAYON_NUM_THREADS environment variable. /// If RAYON_NUM_THREADS is invalid or zero will use the default. num_threads: usize, /// Custom closure, if any, to handle a panic that we cannot propagate /// anywhere else. panic_handler: Option>, /// Closure to compute the name of a thread. get_thread_name: Option String>>, /// The stack size for the created worker threads stack_size: Option, /// Closure invoked on worker thread start. start_handler: Option>, /// Closure invoked on worker thread exit. exit_handler: Option>, /// Closure invoked to spawn threads. spawn_handler: S, /// If false, worker threads will execute spawned jobs in a /// "depth-first" fashion. If true, they will do a "breadth-first" /// fashion. Depth-first is the default. breadth_first: bool, } /// Contains the rayon thread pool configuration. Use [`ThreadPoolBuilder`] instead. /// /// [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html #[deprecated(note = "Use `ThreadPoolBuilder`")] pub struct Configuration { builder: ThreadPoolBuilder, } /// The type for a panic handling closure. Note that this same closure /// may be invoked multiple times in parallel. type PanicHandler = dyn Fn(Box) + Send + Sync; /// The type for a closure that gets invoked when a thread starts. The /// closure is passed the index of the thread on which it is invoked. /// Note that this same closure may be invoked multiple times in parallel. type StartHandler = dyn Fn(usize) + Send + Sync; /// The type for a closure that gets invoked when a thread exits. The /// closure is passed the index of the thread on which is is invoked. /// Note that this same closure may be invoked multiple times in parallel. type ExitHandler = dyn Fn(usize) + Send + Sync; // NB: We can't `#[derive(Default)]` because `S` is left ambiguous. impl Default for ThreadPoolBuilder { fn default() -> Self { ThreadPoolBuilder { num_threads: 0, panic_handler: None, get_thread_name: None, stack_size: None, start_handler: None, exit_handler: None, spawn_handler: DefaultSpawn, breadth_first: false, } } } impl ThreadPoolBuilder { /// Creates and returns a valid rayon thread pool builder, but does not initialize it. pub fn new() -> Self { Self::default() } } /// Note: the `S: ThreadSpawn` constraint is an internal implementation detail for the /// default spawn and those set by [`spawn_handler`](#method.spawn_handler). impl ThreadPoolBuilder where S: ThreadSpawn, { /// Create a new `ThreadPool` initialized using this configuration. pub fn build(self) -> Result { ThreadPool::build(self) } /// Initializes the global thread pool. This initialization is /// **optional**. If you do not call this function, the thread pool /// will be automatically initialized with the default /// configuration. Calling `build_global` is not recommended, except /// in two scenarios: /// /// - You wish to change the default configuration. /// - You are running a benchmark, in which case initializing may /// yield slightly more consistent results, since the worker threads /// will already be ready to go even in the first iteration. But /// this cost is minimal. /// /// Initialization of the global thread pool happens exactly /// once. Once started, the configuration cannot be /// changed. Therefore, if you call `build_global` a second time, it /// will return an error. An `Ok` result indicates that this /// is the first initialization of the thread pool. pub fn build_global(self) -> Result<(), ThreadPoolBuildError> { let registry = registry::init_global_registry(self)?; registry.wait_until_primed(); Ok(()) } } impl ThreadPoolBuilder { /// Create a scoped `ThreadPool` initialized using this configuration. /// /// This is a convenience function for building a pool using [`crossbeam::scope`] /// to spawn threads in a [`spawn_handler`](#method.spawn_handler). /// The threads in this pool will start by calling `wrapper`, which should /// do initialization and continue by calling `ThreadBuilder::run()`. /// /// [`crossbeam::scope`]: https://docs.rs/crossbeam/0.7/crossbeam/fn.scope.html /// /// # Examples /// /// A scoped pool may be useful in combination with scoped thread-local variables. /// /// ``` /// #[macro_use] /// extern crate scoped_tls; /// # use rayon_core as rayon; /// /// scoped_thread_local!(static POOL_DATA: Vec); /// /// fn main() -> Result<(), rayon::ThreadPoolBuildError> { /// let pool_data = vec![1, 2, 3]; /// /// // We haven't assigned any TLS data yet. /// assert!(!POOL_DATA.is_set()); /// /// rayon::ThreadPoolBuilder::new() /// .build_scoped( /// // Borrow `pool_data` in TLS for each thread. /// |thread| POOL_DATA.set(&pool_data, || thread.run()), /// // Do some work that needs the TLS data. /// |pool| pool.install(|| assert!(POOL_DATA.is_set())), /// )?; /// /// // Once we've returned, `pool_data` is no longer borrowed. /// drop(pool_data); /// Ok(()) /// } /// ``` pub fn build_scoped(self, wrapper: W, with_pool: F) -> Result where W: Fn(ThreadBuilder) + Sync, // expected to call `run()` F: FnOnce(&ThreadPool) -> R, { let result = crossbeam_utils::thread::scope(|scope| { let wrapper = &wrapper; let pool = self .spawn_handler(|thread| { let mut builder = scope.builder(); if let Some(name) = thread.name() { builder = builder.name(name.to_string()); } if let Some(size) = thread.stack_size() { builder = builder.stack_size(size); } builder.spawn(move |_| wrapper(thread))?; Ok(()) }) .build()?; Ok(with_pool(&pool)) }); match result { Ok(result) => result, Err(err) => unwind::resume_unwinding(err), } } } impl ThreadPoolBuilder { /// Set a custom function for spawning threads. /// /// Note that the threads will not exit until after the pool is dropped. It /// is up to the caller to wait for thread termination if that is important /// for any invariants. For instance, threads created in [`crossbeam::scope`] /// will be joined before that scope returns, and this will block indefinitely /// if the pool is leaked. Furthermore, the global thread pool doesn't terminate /// until the entire process exits! /// /// [`crossbeam::scope`]: https://docs.rs/crossbeam/0.7/crossbeam/fn.scope.html /// /// # Examples /// /// A minimal spawn handler just needs to call `run()` from an independent thread. /// /// ``` /// # use rayon_core as rayon; /// fn main() -> Result<(), rayon::ThreadPoolBuildError> { /// let pool = rayon::ThreadPoolBuilder::new() /// .spawn_handler(|thread| { /// std::thread::spawn(|| thread.run()); /// Ok(()) /// }) /// .build()?; /// /// pool.install(|| println!("Hello from my custom thread!")); /// Ok(()) /// } /// ``` /// /// The default spawn handler sets the name and stack size if given, and propagates /// any errors from the thread builder. /// /// ``` /// # use rayon_core as rayon; /// fn main() -> Result<(), rayon::ThreadPoolBuildError> { /// let pool = rayon::ThreadPoolBuilder::new() /// .spawn_handler(|thread| { /// let mut b = std::thread::Builder::new(); /// if let Some(name) = thread.name() { /// b = b.name(name.to_owned()); /// } /// if let Some(stack_size) = thread.stack_size() { /// b = b.stack_size(stack_size); /// } /// b.spawn(|| thread.run())?; /// Ok(()) /// }) /// .build()?; /// /// pool.install(|| println!("Hello from my fully custom thread!")); /// Ok(()) /// } /// ``` pub fn spawn_handler(self, spawn: F) -> ThreadPoolBuilder> where F: FnMut(ThreadBuilder) -> io::Result<()>, { ThreadPoolBuilder { spawn_handler: CustomSpawn::new(spawn), // ..self num_threads: self.num_threads, panic_handler: self.panic_handler, get_thread_name: self.get_thread_name, stack_size: self.stack_size, start_handler: self.start_handler, exit_handler: self.exit_handler, breadth_first: self.breadth_first, } } /// Returns a reference to the current spawn handler. fn get_spawn_handler(&mut self) -> &mut S { &mut self.spawn_handler } /// Get the number of threads that will be used for the thread /// pool. See `num_threads()` for more information. fn get_num_threads(&self) -> usize { if self.num_threads > 0 { self.num_threads } else { match env::var("RAYON_NUM_THREADS") .ok() .and_then(|s| usize::from_str(&s).ok()) { Some(x) if x > 0 => return x, Some(x) if x == 0 => return num_cpus::get(), _ => {} } // Support for deprecated `RAYON_RS_NUM_CPUS`. match env::var("RAYON_RS_NUM_CPUS") .ok() .and_then(|s| usize::from_str(&s).ok()) { Some(x) if x > 0 => x, _ => num_cpus::get(), } } } /// Get the thread name for the thread with the given index. fn get_thread_name(&mut self, index: usize) -> Option { let f = self.get_thread_name.as_mut()?; Some(f(index)) } /// Set a closure which takes a thread index and returns /// the thread's name. pub fn thread_name(mut self, closure: F) -> Self where F: FnMut(usize) -> String + 'static, { self.get_thread_name = Some(Box::new(closure)); self } /// Set the number of threads to be used in the rayon threadpool. /// /// If you specify a non-zero number of threads using this /// function, then the resulting thread-pools are guaranteed to /// start at most this number of threads. /// /// If `num_threads` is 0, or you do not call this function, then /// the Rayon runtime will select the number of threads /// automatically. At present, this is based on the /// `RAYON_NUM_THREADS` environment variable (if set), /// or the number of logical CPUs (otherwise). /// In the future, however, the default behavior may /// change to dynamically add or remove threads as needed. /// /// **Future compatibility warning:** Given the default behavior /// may change in the future, if you wish to rely on a fixed /// number of threads, you should use this function to specify /// that number. To reproduce the current default behavior, you /// may wish to use the [`num_cpus` /// crate](https://crates.io/crates/num_cpus) to query the number /// of CPUs dynamically. /// /// **Old environment variable:** `RAYON_NUM_THREADS` is a one-to-one /// replacement of the now deprecated `RAYON_RS_NUM_CPUS` environment /// variable. If both variables are specified, `RAYON_NUM_THREADS` will /// be prefered. pub fn num_threads(mut self, num_threads: usize) -> Self { self.num_threads = num_threads; self } /// Returns a copy of the current panic handler. fn take_panic_handler(&mut self) -> Option> { self.panic_handler.take() } /// Normally, whenever Rayon catches a panic, it tries to /// propagate it to someplace sensible, to try and reflect the /// semantics of sequential execution. But in some cases, /// particularly with the `spawn()` APIs, there is no /// obvious place where we should propagate the panic to. /// In that case, this panic handler is invoked. /// /// If no panic handler is set, the default is to abort the /// process, under the principle that panics should not go /// unobserved. /// /// If the panic handler itself panics, this will abort the /// process. To prevent this, wrap the body of your panic handler /// in a call to `std::panic::catch_unwind()`. pub fn panic_handler(mut self, panic_handler: H) -> Self where H: Fn(Box) + Send + Sync + 'static, { self.panic_handler = Some(Box::new(panic_handler)); self } /// Get the stack size of the worker threads fn get_stack_size(&self) -> Option { self.stack_size } /// Set the stack size of the worker threads pub fn stack_size(mut self, stack_size: usize) -> Self { self.stack_size = Some(stack_size); self } /// **(DEPRECATED)** Suggest to worker threads that they execute /// spawned jobs in a "breadth-first" fashion. /// /// Typically, when a worker thread is idle or blocked, it will /// attempt to execute the job from the *top* of its local deque of /// work (i.e., the job most recently spawned). If this flag is set /// to true, however, workers will prefer to execute in a /// *breadth-first* fashion -- that is, they will search for jobs at /// the *bottom* of their local deque. (At present, workers *always* /// steal from the bottom of other worker's deques, regardless of /// the setting of this flag.) /// /// If you think of the tasks as a tree, where a parent task /// spawns its children in the tree, then this flag loosely /// corresponds to doing a breadth-first traversal of the tree, /// whereas the default would be to do a depth-first traversal. /// /// **Note that this is an "execution hint".** Rayon's task /// execution is highly dynamic and the precise order in which /// independent tasks are executed is not intended to be /// guaranteed. /// /// This `breadth_first()` method is now deprecated per [RFC #1], /// and in the future its effect may be removed. Consider using /// [`scope_fifo()`] for a similar effect. /// /// [RFC #1]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0001-scope-scheduling.md /// [`scope_fifo()`]: fn.scope_fifo.html #[deprecated(note = "use `scope_fifo` and `spawn_fifo` for similar effect")] pub fn breadth_first(mut self) -> Self { self.breadth_first = true; self } fn get_breadth_first(&self) -> bool { self.breadth_first } /// Takes the current thread start callback, leaving `None`. fn take_start_handler(&mut self) -> Option> { self.start_handler.take() } /// Set a callback to be invoked on thread start. /// /// The closure is passed the index of the thread on which it is invoked. /// Note that this same closure may be invoked multiple times in parallel. /// If this closure panics, the panic will be passed to the panic handler. /// If that handler returns, then startup will continue normally. pub fn start_handler(mut self, start_handler: H) -> Self where H: Fn(usize) + Send + Sync + 'static, { self.start_handler = Some(Box::new(start_handler)); self } /// Returns a current thread exit callback, leaving `None`. fn take_exit_handler(&mut self) -> Option> { self.exit_handler.take() } /// Set a callback to be invoked on thread exit. /// /// The closure is passed the index of the thread on which it is invoked. /// Note that this same closure may be invoked multiple times in parallel. /// If this closure panics, the panic will be passed to the panic handler. /// If that handler returns, then the thread will exit normally. pub fn exit_handler(mut self, exit_handler: H) -> Self where H: Fn(usize) + Send + Sync + 'static, { self.exit_handler = Some(Box::new(exit_handler)); self } } #[allow(deprecated)] impl Configuration { /// Creates and return a valid rayon thread pool configuration, but does not initialize it. pub fn new() -> Configuration { Configuration { builder: ThreadPoolBuilder::new(), } } /// Deprecated in favor of `ThreadPoolBuilder::build`. pub fn build(self) -> Result> { self.builder.build().map_err(Box::from) } /// Deprecated in favor of `ThreadPoolBuilder::thread_name`. pub fn thread_name(mut self, closure: F) -> Self where F: FnMut(usize) -> String + 'static, { self.builder = self.builder.thread_name(closure); self } /// Deprecated in favor of `ThreadPoolBuilder::num_threads`. pub fn num_threads(mut self, num_threads: usize) -> Configuration { self.builder = self.builder.num_threads(num_threads); self } /// Deprecated in favor of `ThreadPoolBuilder::panic_handler`. pub fn panic_handler(mut self, panic_handler: H) -> Configuration where H: Fn(Box) + Send + Sync + 'static, { self.builder = self.builder.panic_handler(panic_handler); self } /// Deprecated in favor of `ThreadPoolBuilder::stack_size`. pub fn stack_size(mut self, stack_size: usize) -> Self { self.builder = self.builder.stack_size(stack_size); self } /// Deprecated in favor of `ThreadPoolBuilder::breadth_first`. pub fn breadth_first(mut self) -> Self { self.builder = self.builder.breadth_first(); self } /// Deprecated in favor of `ThreadPoolBuilder::start_handler`. pub fn start_handler(mut self, start_handler: H) -> Configuration where H: Fn(usize) + Send + Sync + 'static, { self.builder = self.builder.start_handler(start_handler); self } /// Deprecated in favor of `ThreadPoolBuilder::exit_handler`. pub fn exit_handler(mut self, exit_handler: H) -> Configuration where H: Fn(usize) + Send + Sync + 'static, { self.builder = self.builder.exit_handler(exit_handler); self } /// Returns a ThreadPoolBuilder with identical parameters. fn into_builder(self) -> ThreadPoolBuilder { self.builder } } impl ThreadPoolBuildError { fn new(kind: ErrorKind) -> ThreadPoolBuildError { ThreadPoolBuildError { kind } } } impl Error for ThreadPoolBuildError { fn description(&self) -> &str { match self.kind { ErrorKind::GlobalPoolAlreadyInitialized => { "The global thread pool has already been initialized." } ErrorKind::IOError(ref e) => e.description(), } } } impl fmt::Display for ThreadPoolBuildError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.kind { ErrorKind::IOError(ref e) => e.fmt(f), _ => self.description().fmt(f), } } } /// Deprecated in favor of `ThreadPoolBuilder::build_global`. #[deprecated(note = "use `ThreadPoolBuilder::build_global`")] #[allow(deprecated)] pub fn initialize(config: Configuration) -> Result<(), Box> { config.into_builder().build_global().map_err(Box::from) } impl fmt::Debug for ThreadPoolBuilder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let ThreadPoolBuilder { ref num_threads, ref get_thread_name, ref panic_handler, ref stack_size, ref start_handler, ref exit_handler, spawn_handler: _, ref breadth_first, } = *self; // Just print `Some()` or `None` to the debug // output. struct ClosurePlaceholder; impl fmt::Debug for ClosurePlaceholder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str("") } } let get_thread_name = get_thread_name.as_ref().map(|_| ClosurePlaceholder); let panic_handler = panic_handler.as_ref().map(|_| ClosurePlaceholder); let start_handler = start_handler.as_ref().map(|_| ClosurePlaceholder); let exit_handler = exit_handler.as_ref().map(|_| ClosurePlaceholder); f.debug_struct("ThreadPoolBuilder") .field("num_threads", num_threads) .field("get_thread_name", &get_thread_name) .field("panic_handler", &panic_handler) .field("stack_size", &stack_size) .field("start_handler", &start_handler) .field("exit_handler", &exit_handler) .field("breadth_first", &breadth_first) .finish() } } #[allow(deprecated)] impl Default for Configuration { fn default() -> Self { Configuration { builder: Default::default(), } } } #[allow(deprecated)] impl fmt::Debug for Configuration { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.builder.fmt(f) } } /// Provides the calling context to a closure called by `join_context`. #[derive(Debug)] pub struct FnContext { migrated: bool, /// disable `Send` and `Sync`, just for a little future-proofing. _marker: PhantomData<*mut ()>, } impl FnContext { #[inline] fn new(migrated: bool) -> Self { FnContext { migrated, _marker: PhantomData, } } } impl FnContext { /// Returns `true` if the closure was called from a different thread /// than it was provided from. #[inline] pub fn migrated(&self) -> bool { self.migrated } } rayon-core-1.6.0/src/log.rs010066400247370024737000000045461352066016100137720ustar0000000000000000//! Debug Logging //! //! To use in a debug build, set the env var `RAYON_LOG=1`. In a //! release build, logs are compiled out. You will have to change //! `DUMP_LOGS` to be `true`. //! //! **Old environment variable:** `RAYON_LOG` is a one-to-one //! replacement of the now deprecated `RAYON_RS_LOG` environment //! variable, which is still supported for backwards compatibility. #[cfg(debug_assertions)] use std::env; #[cfg_attr(debug_assertions, derive(Debug))] #[cfg_attr(not(debug_assertions), allow(dead_code))] pub(super) enum Event { Tickle { worker: usize, old_state: usize, }, GetSleepy { worker: usize, state: usize, }, GotSleepy { worker: usize, old_state: usize, new_state: usize, }, GotAwoken { worker: usize, }, FellAsleep { worker: usize, }, GotInterrupted { worker: usize, }, FoundWork { worker: usize, yields: usize, }, DidNotFindWork { worker: usize, yields: usize, }, StoleWork { worker: usize, victim: usize, }, UninjectedWork { worker: usize, }, WaitUntil { worker: usize, }, LatchSet { worker: usize, }, InjectJobs { count: usize, }, Join { worker: usize, }, PoppedJob { worker: usize, }, PoppedRhs { worker: usize, }, LostJob { worker: usize, }, JobCompletedOk { owner_thread: usize, }, JobPanickedErrorStored { owner_thread: usize, }, JobPanickedErrorNotStored { owner_thread: usize, }, ScopeCompletePanicked { owner_thread: usize, }, ScopeCompleteNoPanic { owner_thread: usize, }, } #[cfg(debug_assertions)] lazy_static! { pub(super) static ref LOG_ENV: bool = env::var("RAYON_LOG").is_ok() || env::var("RAYON_RS_LOG").is_ok(); } #[cfg(debug_assertions)] macro_rules! log { ($event:expr) => { if *$crate::log::LOG_ENV { eprintln!("{:?}", $event); } }; } #[cfg(not(debug_assertions))] macro_rules! log { ($event:expr) => { if false { // Expand `$event` so it still appears used, but without // any of the formatting code to be optimized away. $event; } }; } rayon-core-1.6.0/src/private.rs010066400247370024737000000015451352066016100146570ustar0000000000000000//! The public parts of this private module are used to create traits //! that cannot be implemented outside of our own crate. This way we //! can feel free to extend those traits without worrying about it //! being a breaking change for other implementations. /// If this type is pub but not publicly reachable, third parties /// can't name it and can't implement traits using it. #[allow(missing_debug_implementations)] pub struct PrivateMarker; macro_rules! private_decl { () => { /// This trait is private; this method exists to make it /// impossible to implement outside the crate. #[doc(hidden)] fn __rayon_private__(&self) -> ::private::PrivateMarker; } } macro_rules! private_impl { () => { fn __rayon_private__(&self) -> ::private::PrivateMarker { ::private::PrivateMarker } } } rayon-core-1.6.0/src/registry.rs010066400247370024737000000731661353104652500150710ustar0000000000000000use crossbeam_deque::{Steal, Stealer, Worker}; use crossbeam_queue::SegQueue; #[cfg(rayon_unstable)] use internal::task::Task; #[cfg(rayon_unstable)] use job::Job; use job::{JobFifo, JobRef, StackJob}; use latch::{CountLatch, Latch, LatchProbe, LockLatch, SpinLatch, TickleLatch}; use log::Event::*; use sleep::Sleep; use std::any::Any; use std::cell::Cell; use std::collections::hash_map::DefaultHasher; use std::fmt; use std::hash::Hasher; use std::io; use std::mem; use std::ptr; #[allow(deprecated)] use std::sync::atomic::ATOMIC_USIZE_INIT; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Once}; use std::thread; use std::usize; use unwind; use util::leak; use {ErrorKind, ExitHandler, PanicHandler, StartHandler, ThreadPoolBuildError, ThreadPoolBuilder}; /// Thread builder used for customization via /// [`ThreadPoolBuilder::spawn_handler`](struct.ThreadPoolBuilder.html#method.spawn_handler). pub struct ThreadBuilder { name: Option, stack_size: Option, worker: Worker, registry: Arc, index: usize, } impl ThreadBuilder { /// Get the index of this thread in the pool, within `0..num_threads`. pub fn index(&self) -> usize { self.index } /// Get the string that was specified by `ThreadPoolBuilder::name()`. pub fn name(&self) -> Option<&str> { self.name.as_ref().map(String::as_str) } /// Get the value that was specified by `ThreadPoolBuilder::stack_size()`. pub fn stack_size(&self) -> Option { self.stack_size } /// Execute the main loop for this thread. This will not return until the /// thread pool is dropped. pub fn run(self) { unsafe { main_loop(self.worker, self.registry, self.index) } } } impl fmt::Debug for ThreadBuilder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("ThreadBuilder") .field("pool", &self.registry.id()) .field("index", &self.index) .field("name", &self.name) .field("stack_size", &self.stack_size) .finish() } } /// Generalized trait for spawning a thread in the `Registry`. /// /// This trait is pub-in-private -- E0445 forces us to make it public, /// but we don't actually want to expose these details in the API. pub trait ThreadSpawn { private_decl! {} /// Spawn a thread with the `ThreadBuilder` parameters, and then /// call `ThreadBuilder::run()`. fn spawn(&mut self, ThreadBuilder) -> io::Result<()>; } /// Spawns a thread in the "normal" way with `std::thread::Builder`. /// /// This type is pub-in-private -- E0445 forces us to make it public, /// but we don't actually want to expose these details in the API. #[derive(Debug, Default)] pub struct DefaultSpawn; impl ThreadSpawn for DefaultSpawn { private_impl! {} fn spawn(&mut self, thread: ThreadBuilder) -> io::Result<()> { let mut b = thread::Builder::new(); if let Some(name) = thread.name() { b = b.name(name.to_owned()); } if let Some(stack_size) = thread.stack_size() { b = b.stack_size(stack_size); } b.spawn(|| thread.run())?; Ok(()) } } /// Spawns a thread with a user's custom callback. /// /// This type is pub-in-private -- E0445 forces us to make it public, /// but we don't actually want to expose these details in the API. #[derive(Debug)] pub struct CustomSpawn(F); impl CustomSpawn where F: FnMut(ThreadBuilder) -> io::Result<()>, { pub(super) fn new(spawn: F) -> Self { CustomSpawn(spawn) } } impl ThreadSpawn for CustomSpawn where F: FnMut(ThreadBuilder) -> io::Result<()>, { private_impl! {} #[inline] fn spawn(&mut self, thread: ThreadBuilder) -> io::Result<()> { (self.0)(thread) } } pub(super) struct Registry { thread_infos: Vec, sleep: Sleep, injected_jobs: SegQueue, panic_handler: Option>, start_handler: Option>, exit_handler: Option>, // When this latch reaches 0, it means that all work on this // registry must be complete. This is ensured in the following ways: // // - if this is the global registry, there is a ref-count that never // gets released. // - if this is a user-created thread-pool, then so long as the thread-pool // exists, it holds a reference. // - when we inject a "blocking job" into the registry with `ThreadPool::install()`, // no adjustment is needed; the `ThreadPool` holds the reference, and since we won't // return until the blocking job is complete, that ref will continue to be held. // - when `join()` or `scope()` is invoked, similarly, no adjustments are needed. // These are always owned by some other job (e.g., one injected by `ThreadPool::install()`) // and that job will keep the pool alive. terminate_latch: CountLatch, } /// //////////////////////////////////////////////////////////////////////// /// Initialization static mut THE_REGISTRY: Option<&'static Arc> = None; static THE_REGISTRY_SET: Once = Once::new(); /// Starts the worker threads (if that has not already happened). If /// initialization has not already occurred, use the default /// configuration. fn global_registry() -> &'static Arc { set_global_registry(|| Registry::new(ThreadPoolBuilder::new())) .or_else(|err| unsafe { THE_REGISTRY.ok_or(err) }) .expect("The global thread pool has not been initialized.") } /// Starts the worker threads (if that has not already happened) with /// the given builder. pub(super) fn init_global_registry( builder: ThreadPoolBuilder, ) -> Result<&'static Arc, ThreadPoolBuildError> where S: ThreadSpawn, { set_global_registry(|| Registry::new(builder)) } /// Starts the worker threads (if that has not already happened) /// by creating a registry with the given callback. fn set_global_registry(registry: F) -> Result<&'static Arc, ThreadPoolBuildError> where F: FnOnce() -> Result, ThreadPoolBuildError>, { let mut result = Err(ThreadPoolBuildError::new( ErrorKind::GlobalPoolAlreadyInitialized, )); THE_REGISTRY_SET.call_once(|| { result = registry().map(|registry| { let registry = leak(registry); unsafe { THE_REGISTRY = Some(registry); } registry }); }); result } struct Terminator<'a>(&'a Arc); impl<'a> Drop for Terminator<'a> { fn drop(&mut self) { self.0.terminate() } } impl Registry { pub(super) fn new( mut builder: ThreadPoolBuilder, ) -> Result, ThreadPoolBuildError> where S: ThreadSpawn, { let n_threads = builder.get_num_threads(); let breadth_first = builder.get_breadth_first(); let (workers, stealers): (Vec<_>, Vec<_>) = (0..n_threads) .map(|_| { let worker = if breadth_first { Worker::new_fifo() } else { Worker::new_lifo() }; let stealer = worker.stealer(); (worker, stealer) }) .unzip(); let registry = Arc::new(Registry { thread_infos: stealers.into_iter().map(ThreadInfo::new).collect(), sleep: Sleep::new(), injected_jobs: SegQueue::new(), terminate_latch: CountLatch::new(), panic_handler: builder.take_panic_handler(), start_handler: builder.take_start_handler(), exit_handler: builder.take_exit_handler(), }); // If we return early or panic, make sure to terminate existing threads. let t1000 = Terminator(®istry); for (index, worker) in workers.into_iter().enumerate() { let thread = ThreadBuilder { name: builder.get_thread_name(index), stack_size: builder.get_stack_size(), registry: registry.clone(), worker, index, }; if let Err(e) = builder.get_spawn_handler().spawn(thread) { return Err(ThreadPoolBuildError::new(ErrorKind::IOError(e))); } } // Returning normally now, without termination. mem::forget(t1000); Ok(registry.clone()) } #[cfg(rayon_unstable)] pub(super) fn global() -> Arc { global_registry().clone() } pub(super) fn current() -> Arc { unsafe { let worker_thread = WorkerThread::current(); if worker_thread.is_null() { global_registry().clone() } else { (*worker_thread).registry.clone() } } } /// Returns the number of threads in the current registry. This /// is better than `Registry::current().num_threads()` because it /// avoids incrementing the `Arc`. pub(super) fn current_num_threads() -> usize { unsafe { let worker_thread = WorkerThread::current(); if worker_thread.is_null() { global_registry().num_threads() } else { (*worker_thread).registry.num_threads() } } } /// Returns the current `WorkerThread` if it's part of this `Registry`. pub(super) fn current_thread(&self) -> Option<&WorkerThread> { unsafe { let worker = WorkerThread::current().as_ref()?; if worker.registry().id() == self.id() { Some(worker) } else { None } } } /// Returns an opaque identifier for this registry. pub(super) fn id(&self) -> RegistryId { // We can rely on `self` not to change since we only ever create // registries that are boxed up in an `Arc` (see `new()` above). RegistryId { addr: self as *const Self as usize, } } pub(super) fn num_threads(&self) -> usize { self.thread_infos.len() } pub(super) fn handle_panic(&self, err: Box) { match self.panic_handler { Some(ref handler) => { // If the customizable panic handler itself panics, // then we abort. let abort_guard = unwind::AbortIfPanic; handler(err); mem::forget(abort_guard); } None => { // Default panic handler aborts. let _ = unwind::AbortIfPanic; // let this drop. } } } /// Waits for the worker threads to get up and running. This is /// meant to be used for benchmarking purposes, primarily, so that /// you can get more consistent numbers by having everything /// "ready to go". pub(super) fn wait_until_primed(&self) { for info in &self.thread_infos { info.primed.wait(); } } /// Waits for the worker threads to stop. This is used for testing /// -- so we can check that termination actually works. #[cfg(test)] pub(super) fn wait_until_stopped(&self) { for info in &self.thread_infos { info.stopped.wait(); } } /// //////////////////////////////////////////////////////////////////////// /// MAIN LOOP /// /// So long as all of the worker threads are hanging out in their /// top-level loop, there is no work to be done. /// Push a job into the given `registry`. If we are running on a /// worker thread for the registry, this will push onto the /// deque. Else, it will inject from the outside (which is slower). pub(super) fn inject_or_push(&self, job_ref: JobRef) { let worker_thread = WorkerThread::current(); unsafe { if !worker_thread.is_null() && (*worker_thread).registry().id() == self.id() { (*worker_thread).push(job_ref); } else { self.inject(&[job_ref]); } } } /// Unsafe: the caller must guarantee that `task` will stay valid /// until it executes. #[cfg(rayon_unstable)] pub(super) unsafe fn submit_task(&self, task: Arc) where T: Task, { let task_job = TaskJob::new(task); let task_job_ref = TaskJob::into_job_ref(task_job); return self.inject_or_push(task_job_ref); /// A little newtype wrapper for `T`, just because I did not /// want to implement `Job` for all `T: Task`. struct TaskJob { _data: T, } impl TaskJob { fn new(arc: Arc) -> Arc { // `TaskJob` has the same layout as `T`, so we can safely // tranmsute this `T` into a `TaskJob`. This lets us write our // impls of `Job` for `TaskJob`, making them more restricted. // Since `Job` is a private trait, this is not strictly necessary, // I don't think, but makes me feel better. unsafe { mem::transmute(arc) } } fn into_task(this: Arc>) -> Arc { // Same logic as `new()` unsafe { mem::transmute(this) } } unsafe fn into_job_ref(this: Arc) -> JobRef { let this: *const Self = mem::transmute(this); JobRef::new(this) } } impl Job for TaskJob { unsafe fn execute(this: *const Self) { let this: Arc = mem::transmute(this); let task: Arc = TaskJob::into_task(this); Task::execute(task); } } } /// Push a job into the "external jobs" queue; it will be taken by /// whatever worker has nothing to do. Use this is you know that /// you are not on a worker of this registry. pub(super) fn inject(&self, injected_jobs: &[JobRef]) { log!(InjectJobs { count: injected_jobs.len() }); // It should not be possible for `state.terminate` to be true // here. It is only set to true when the user creates (and // drops) a `ThreadPool`; and, in that case, they cannot be // calling `inject()` later, since they dropped their // `ThreadPool`. assert!( !self.terminate_latch.probe(), "inject() sees state.terminate as true" ); for &job_ref in injected_jobs { self.injected_jobs.push(job_ref); } self.sleep.tickle(usize::MAX); } fn pop_injected_job(&self, worker_index: usize) -> Option { let job = self.injected_jobs.pop().ok(); if job.is_some() { log!(UninjectedWork { worker: worker_index }); } job } /// If already in a worker-thread of this registry, just execute `op`. /// Otherwise, inject `op` in this thread-pool. Either way, block until `op` /// completes and return its return value. If `op` panics, that panic will /// be propagated as well. The second argument indicates `true` if injection /// was performed, `false` if executed directly. pub(super) fn in_worker(&self, op: OP) -> R where OP: FnOnce(&WorkerThread, bool) -> R + Send, R: Send, { unsafe { let worker_thread = WorkerThread::current(); if worker_thread.is_null() { self.in_worker_cold(op) } else if (*worker_thread).registry().id() != self.id() { self.in_worker_cross(&*worker_thread, op) } else { // Perfectly valid to give them a `&T`: this is the // current thread, so we know the data structure won't be // invalidated until we return. op(&*worker_thread, false) } } } #[cold] unsafe fn in_worker_cold(&self, op: OP) -> R where OP: FnOnce(&WorkerThread, bool) -> R + Send, R: Send, { thread_local!(static LOCK_LATCH: LockLatch = LockLatch::new()); LOCK_LATCH.with(|l| { // This thread isn't a member of *any* thread pool, so just block. debug_assert!(WorkerThread::current().is_null()); let job = StackJob::new( |injected| { let worker_thread = WorkerThread::current(); assert!(injected && !worker_thread.is_null()); op(&*worker_thread, true) }, l, ); self.inject(&[job.as_job_ref()]); job.latch.wait_and_reset(); // Make sure we can use the same latch again next time. job.into_result() }) } #[cold] unsafe fn in_worker_cross(&self, current_thread: &WorkerThread, op: OP) -> R where OP: FnOnce(&WorkerThread, bool) -> R + Send, R: Send, { // This thread is a member of a different pool, so let it process // other work while waiting for this `op` to complete. debug_assert!(current_thread.registry().id() != self.id()); let latch = TickleLatch::new(SpinLatch::new(), ¤t_thread.registry().sleep); let job = StackJob::new( |injected| { let worker_thread = WorkerThread::current(); assert!(injected && !worker_thread.is_null()); op(&*worker_thread, true) }, latch, ); self.inject(&[job.as_job_ref()]); current_thread.wait_until(&job.latch); job.into_result() } /// Increment the terminate counter. This increment should be /// balanced by a call to `terminate`, which will decrement. This /// is used when spawning asynchronous work, which needs to /// prevent the registry from terminating so long as it is active. /// /// Note that blocking functions such as `join` and `scope` do not /// need to concern themselves with this fn; their context is /// responsible for ensuring the current thread-pool will not /// terminate until they return. /// /// The global thread-pool always has an outstanding reference /// (the initial one). Custom thread-pools have one outstanding /// reference that is dropped when the `ThreadPool` is dropped: /// since installing the thread-pool blocks until any joins/scopes /// complete, this ensures that joins/scopes are covered. /// /// The exception is `::spawn()`, which can create a job outside /// of any blocking scope. In that case, the job itself holds a /// terminate count and is responsible for invoking `terminate()` /// when finished. pub(super) fn increment_terminate_count(&self) { self.terminate_latch.increment(); } /// Signals that the thread-pool which owns this registry has been /// dropped. The worker threads will gradually terminate, once any /// extant work is completed. pub(super) fn terminate(&self) { self.terminate_latch.set(); self.sleep.tickle(usize::MAX); } } #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] pub(super) struct RegistryId { addr: usize, } struct ThreadInfo { /// Latch set once thread has started and we are entering into the /// main loop. Used to wait for worker threads to become primed, /// primarily of interest for benchmarking. primed: LockLatch, /// Latch is set once worker thread has completed. Used to wait /// until workers have stopped; only used for tests. stopped: LockLatch, /// the "stealer" half of the worker's deque stealer: Stealer, } impl ThreadInfo { fn new(stealer: Stealer) -> ThreadInfo { ThreadInfo { primed: LockLatch::new(), stopped: LockLatch::new(), stealer, } } } /// //////////////////////////////////////////////////////////////////////// /// WorkerThread identifiers pub(super) struct WorkerThread { /// the "worker" half of our local deque worker: Worker, /// local queue used for `spawn_fifo` indirection fifo: JobFifo, index: usize, /// A weak random number generator. rng: XorShift64Star, registry: Arc, } // This is a bit sketchy, but basically: the WorkerThread is // allocated on the stack of the worker on entry and stored into this // thread local variable. So it will remain valid at least until the // worker is fully unwound. Using an unsafe pointer avoids the need // for a RefCell etc. thread_local! { static WORKER_THREAD_STATE: Cell<*const WorkerThread> = Cell::new(ptr::null()); } impl Drop for WorkerThread { fn drop(&mut self) { // Undo `set_current` WORKER_THREAD_STATE.with(|t| { assert!(t.get().eq(&(self as *const _))); t.set(ptr::null()); }); } } impl WorkerThread { /// Gets the `WorkerThread` index for the current thread; returns /// NULL if this is not a worker thread. This pointer is valid /// anywhere on the current thread. #[inline] pub(super) fn current() -> *const WorkerThread { WORKER_THREAD_STATE.with(Cell::get) } /// Sets `self` as the worker thread index for the current thread. /// This is done during worker thread startup. unsafe fn set_current(thread: *const WorkerThread) { WORKER_THREAD_STATE.with(|t| { assert!(t.get().is_null()); t.set(thread); }); } /// Returns the registry that owns this worker thread. pub(super) fn registry(&self) -> &Arc { &self.registry } /// Our index amongst the worker threads (ranges from `0..self.num_threads()`). #[inline] pub(super) fn index(&self) -> usize { self.index } #[inline] pub(super) unsafe fn push(&self, job: JobRef) { self.worker.push(job); self.registry.sleep.tickle(self.index); } #[inline] pub(super) unsafe fn push_fifo(&self, job: JobRef) { self.push(self.fifo.push(job)); } #[inline] pub(super) fn local_deque_is_empty(&self) -> bool { self.worker.is_empty() } /// Attempts to obtain a "local" job -- typically this means /// popping from the top of the stack, though if we are configured /// for breadth-first execution, it would mean dequeuing from the /// bottom. #[inline] pub(super) unsafe fn take_local_job(&self) -> Option { self.worker.pop() } /// Wait until the latch is set. Try to keep busy by popping and /// stealing tasks as necessary. #[inline] pub(super) unsafe fn wait_until(&self, latch: &L) { log!(WaitUntil { worker: self.index }); if !latch.probe() { self.wait_until_cold(latch); } } #[cold] unsafe fn wait_until_cold(&self, latch: &L) { // the code below should swallow all panics and hence never // unwind; but if something does wrong, we want to abort, // because otherwise other code in rayon may assume that the // latch has been signaled, and that can lead to random memory // accesses, which would be *very bad* let abort_guard = unwind::AbortIfPanic; let mut yields = 0; while !latch.probe() { // Try to find some work to do. We give preference first // to things in our local deque, then in other workers // deques, and finally to injected jobs from the // outside. The idea is to finish what we started before // we take on something new. if let Some(job) = self .take_local_job() .or_else(|| self.steal()) .or_else(|| self.registry.pop_injected_job(self.index)) { yields = self.registry.sleep.work_found(self.index, yields); self.execute(job); } else { yields = self.registry.sleep.no_work_found(self.index, yields); } } // If we were sleepy, we are not anymore. We "found work" -- // whatever the surrounding thread was doing before it had to // wait. self.registry.sleep.work_found(self.index, yields); log!(LatchSet { worker: self.index }); mem::forget(abort_guard); // successful execution, do not abort } pub(super) unsafe fn execute(&self, job: JobRef) { job.execute(); // Subtle: executing this job will have `set()` some of its // latches. This may mean that a sleepy (or sleeping) worker // can now make progress. So we have to tickle them to let // them know. self.registry.sleep.tickle(self.index); } /// Try to steal a single job and return it. /// /// This should only be done as a last resort, when there is no /// local work to do. unsafe fn steal(&self) -> Option { // we only steal when we don't have any work to do locally debug_assert!(self.local_deque_is_empty()); // otherwise, try to steal let num_threads = self.registry.thread_infos.len(); if num_threads <= 1 { return None; } let start = self.rng.next_usize(num_threads); (start..num_threads) .chain(0..start) .filter(|&i| i != self.index) .filter_map(|victim_index| { let victim = &self.registry.thread_infos[victim_index]; loop { match victim.stealer.steal() { Steal::Empty => return None, Steal::Success(d) => { log!(StoleWork { worker: self.index, victim: victim_index }); return Some(d); } Steal::Retry => {} } } }) .next() } } /// //////////////////////////////////////////////////////////////////////// unsafe fn main_loop(worker: Worker, registry: Arc, index: usize) { let worker_thread = &WorkerThread { worker, fifo: JobFifo::new(), index, rng: XorShift64Star::new(), registry: registry.clone(), }; WorkerThread::set_current(worker_thread); // let registry know we are ready to do work registry.thread_infos[index].primed.set(); // Worker threads should not panic. If they do, just abort, as the // internal state of the threadpool is corrupted. Note that if // **user code** panics, we should catch that and redirect. let abort_guard = unwind::AbortIfPanic; // Inform a user callback that we started a thread. if let Some(ref handler) = registry.start_handler { let registry = registry.clone(); match unwind::halt_unwinding(|| handler(index)) { Ok(()) => {} Err(err) => { registry.handle_panic(err); } } } worker_thread.wait_until(®istry.terminate_latch); // Should not be any work left in our queue. debug_assert!(worker_thread.take_local_job().is_none()); // let registry know we are done registry.thread_infos[index].stopped.set(); // Normal termination, do not abort. mem::forget(abort_guard); // Inform a user callback that we exited a thread. if let Some(ref handler) = registry.exit_handler { let registry = registry.clone(); match unwind::halt_unwinding(|| handler(index)) { Ok(()) => {} Err(err) => { registry.handle_panic(err); } } // We're already exiting the thread, there's nothing else to do. } } /// If already in a worker-thread, just execute `op`. Otherwise, /// execute `op` in the default thread-pool. Either way, block until /// `op` completes and return its return value. If `op` panics, that /// panic will be propagated as well. The second argument indicates /// `true` if injection was performed, `false` if executed directly. pub(super) fn in_worker(op: OP) -> R where OP: FnOnce(&WorkerThread, bool) -> R + Send, R: Send, { unsafe { let owner_thread = WorkerThread::current(); if !owner_thread.is_null() { // Perfectly valid to give them a `&T`: this is the // current thread, so we know the data structure won't be // invalidated until we return. op(&*owner_thread, false) } else { global_registry().in_worker_cold(op) } } } /// [xorshift*] is a fast pseudorandom number generator which will /// even tolerate weak seeding, as long as it's not zero. /// /// [xorshift*]: https://en.wikipedia.org/wiki/Xorshift#xorshift* struct XorShift64Star { state: Cell, } impl XorShift64Star { fn new() -> Self { // Any non-zero seed will do -- this uses the hash of a global counter. let mut seed = 0; while seed == 0 { let mut hasher = DefaultHasher::new(); #[allow(deprecated)] static COUNTER: AtomicUsize = ATOMIC_USIZE_INIT; hasher.write_usize(COUNTER.fetch_add(1, Ordering::Relaxed)); seed = hasher.finish(); } XorShift64Star { state: Cell::new(seed), } } fn next(&self) -> u64 { let mut x = self.state.get(); debug_assert_ne!(x, 0); x ^= x >> 12; x ^= x << 25; x ^= x >> 27; self.state.set(x); x.wrapping_mul(0x2545_f491_4f6c_dd1d) } /// Return a value from `0..n`. fn next_usize(&self, n: usize) -> usize { (self.next() % n as u64) as usize } } rayon-core-1.6.0/src/scope/internal.rs010066400247370024737000000031401352066016100161230ustar0000000000000000#![cfg(rayon_unstable)] use super::{Scope, ScopeBase}; use internal::task::{ScopeHandle, Task, ToScopeHandle}; use std::any::Any; use std::mem; use std::sync::Arc; impl<'scope> ToScopeHandle<'scope> for Scope<'scope> { type ScopeHandle = LocalScopeHandle<'scope>; fn to_scope_handle(&self) -> Self::ScopeHandle { unsafe { LocalScopeHandle::new(self) } } } #[derive(Debug)] pub struct LocalScopeHandle<'scope> { scope: *const ScopeBase<'scope>, } impl<'scope> LocalScopeHandle<'scope> { /// Caller guarantees that `*scope` will remain valid /// until the scope completes. Since we acquire a ref, /// that means it will remain valid until we release it. unsafe fn new(scope: &Scope<'scope>) -> Self { scope.base.increment(); LocalScopeHandle { scope: &scope.base } } } impl<'scope> Drop for LocalScopeHandle<'scope> { fn drop(&mut self) { unsafe { if !self.scope.is_null() { (*self.scope).job_completed_ok(); } } } } /// We assert that the `Self` type remains valid until a /// method is called, and that `'scope` will not end until /// that point. unsafe impl<'scope> ScopeHandle<'scope> for LocalScopeHandle<'scope> { unsafe fn spawn_task(&self, task: Arc) { let scope = &*self.scope; scope.registry.submit_task(task); } fn ok(self) { mem::drop(self); } fn panicked(self, err: Box) { unsafe { (*self.scope).job_panicked(err); mem::forget(self); // no need to run dtor now } } } rayon-core-1.6.0/src/scope/mod.rs010066400247370024737000000560231353104652500151020ustar0000000000000000//! Methods for custom fork-join scopes, created by the [`scope()`] //! function. These are a more flexible alternative to [`join()`]. //! //! [`scope()`]: fn.scope.html //! [`join()`]: ../join/join.fn.html use job::{HeapJob, JobFifo}; use latch::{CountLatch, Latch}; use log::Event::*; use registry::{in_worker, Registry, WorkerThread}; use std::any::Any; use std::fmt; use std::marker::PhantomData; use std::mem; use std::ptr; use std::sync::atomic::{AtomicPtr, Ordering}; use std::sync::Arc; use unwind; mod internal; #[cfg(test)] mod test; /// Represents a fork-join scope which can be used to spawn any number of tasks. /// See [`scope()`] for more information. /// ///[`scope()`]: fn.scope.html pub struct Scope<'scope> { base: ScopeBase<'scope>, } /// Represents a fork-join scope which can be used to spawn any number of tasks. /// Those spawned from the same thread are prioritized in relative FIFO order. /// See [`scope_fifo()`] for more information. /// ///[`scope_fifo()`]: fn.scope_fifo.html pub struct ScopeFifo<'scope> { base: ScopeBase<'scope>, fifos: Vec, } struct ScopeBase<'scope> { /// thread where `scope()` was executed (note that individual jobs /// may be executing on different worker threads, though they /// should always be within the same pool of threads) owner_thread_index: usize, /// thread registry where `scope()` was executed. registry: Arc, /// if some job panicked, the error is stored here; it will be /// propagated to the one who created the scope panic: AtomicPtr>, /// latch to set when the counter drops to zero (and hence this scope is complete) job_completed_latch: CountLatch, /// You can think of a scope as containing a list of closures to execute, /// all of which outlive `'scope`. They're not actually required to be /// `Sync`, but it's still safe to let the `Scope` implement `Sync` because /// the closures are only *moved* across threads to be executed. marker: PhantomData) + Send + Sync + 'scope>>, } /// Create a "fork-join" scope `s` and invokes the closure with a /// reference to `s`. This closure can then spawn asynchronous tasks /// into `s`. Those tasks may run asynchronously with respect to the /// closure; they may themselves spawn additional tasks into `s`. When /// the closure returns, it will block until all tasks that have been /// spawned into `s` complete. /// /// `scope()` is a more flexible building block compared to `join()`, /// since a loop can be used to spawn any number of tasks without /// recursing. However, that flexibility comes at a performance price: /// tasks spawned using `scope()` must be allocated onto the heap, /// whereas `join()` can make exclusive use of the stack. **Prefer /// `join()` (or, even better, parallel iterators) where possible.** /// /// # Example /// /// The Rayon `join()` function launches two closures and waits for them /// to stop. One could implement `join()` using a scope like so, although /// it would be less efficient than the real implementation: /// /// ```rust /// # use rayon_core as rayon; /// pub fn join(oper_a: A, oper_b: B) -> (RA, RB) /// where A: FnOnce() -> RA + Send, /// B: FnOnce() -> RB + Send, /// RA: Send, /// RB: Send, /// { /// let mut result_a: Option = None; /// let mut result_b: Option = None; /// rayon::scope(|s| { /// s.spawn(|_| result_a = Some(oper_a())); /// s.spawn(|_| result_b = Some(oper_b())); /// }); /// (result_a.unwrap(), result_b.unwrap()) /// } /// ``` /// /// # A note on threading /// /// The closure given to `scope()` executes in the Rayon thread-pool, /// as do those given to `spawn()`. This means that you can't access /// thread-local variables (well, you can, but they may have /// unexpected values). /// /// # Task execution /// /// Task execution potentially starts as soon as `spawn()` is called. /// The task will end sometime before `scope()` returns. Note that the /// *closure* given to scope may return much earlier. In general /// the lifetime of a scope created like `scope(body) goes something like this: /// /// - Scope begins when `scope(body)` is called /// - Scope body `body()` is invoked /// - Scope tasks may be spawned /// - Scope body returns /// - Scope tasks execute, possibly spawning more tasks /// - Once all tasks are done, scope ends and `scope()` returns /// /// To see how and when tasks are joined, consider this example: /// /// ```rust /// # use rayon_core as rayon; /// // point start /// rayon::scope(|s| { /// s.spawn(|s| { // task s.1 /// s.spawn(|s| { // task s.1.1 /// rayon::scope(|t| { /// t.spawn(|_| ()); // task t.1 /// t.spawn(|_| ()); // task t.2 /// }); /// }); /// }); /// s.spawn(|s| { // task s.2 /// }); /// // point mid /// }); /// // point end /// ``` /// /// The various tasks that are run will execute roughly like so: /// /// ```notrust /// | (start) /// | /// | (scope `s` created) /// +-----------------------------------------------+ (task s.2) /// +-------+ (task s.1) | /// | | | /// | +---+ (task s.1.1) | /// | | | | /// | | | (scope `t` created) | /// | | +----------------+ (task t.2) | /// | | +---+ (task t.1) | | /// | (mid) | | | | | /// : | + <-+------------+ (scope `t` ends) | /// : | | | /// |<------+---+-----------------------------------+ (scope `s` ends) /// | /// | (end) /// ``` /// /// The point here is that everything spawned into scope `s` will /// terminate (at latest) at the same point -- right before the /// original call to `rayon::scope` returns. This includes new /// subtasks created by other subtasks (e.g., task `s.1.1`). If a new /// scope is created (such as `t`), the things spawned into that scope /// will be joined before that scope returns, which in turn occurs /// before the creating task (task `s.1.1` in this case) finishes. /// /// There is no guaranteed order of execution for spawns in a scope, /// given that other threads may steal tasks at any time. However, they /// are generally prioritized in a LIFO order on the thread from which /// they were spawned. So in this example, absent any stealing, we can /// expect `s.2` to execute before `s.1`, and `t.2` before `t.1`. Other /// threads always steal from the other end of the deque, like FIFO /// order. The idea is that "recent" tasks are most likely to be fresh /// in the local CPU's cache, while other threads can steal older /// "stale" tasks. For an alternate approach, consider /// [`scope_fifo()`] instead. /// /// [`scope_fifo()`]: fn.scope_fifo.html /// /// # Accessing stack data /// /// In general, spawned tasks may access stack data in place that /// outlives the scope itself. Other data must be fully owned by the /// spawned task. /// /// ```rust /// # use rayon_core as rayon; /// let ok: Vec = vec![1, 2, 3]; /// rayon::scope(|s| { /// let bad: Vec = vec![4, 5, 6]; /// s.spawn(|_| { /// // We can access `ok` because outlives the scope `s`. /// println!("ok: {:?}", ok); /// /// // If we just try to use `bad` here, the closure will borrow `bad` /// // (because we are just printing it out, and that only requires a /// // borrow), which will result in a compilation error. Read on /// // for options. /// // println!("bad: {:?}", bad); /// }); /// }); /// ``` /// /// As the comments example above suggest, to reference `bad` we must /// take ownership of it. One way to do this is to detach the closure /// from the surrounding stack frame, using the `move` keyword. This /// will cause it to take ownership of *all* the variables it touches, /// in this case including both `ok` *and* `bad`: /// /// ```rust /// # use rayon_core as rayon; /// let ok: Vec = vec![1, 2, 3]; /// rayon::scope(|s| { /// let bad: Vec = vec![4, 5, 6]; /// s.spawn(move |_| { /// println!("ok: {:?}", ok); /// println!("bad: {:?}", bad); /// }); /// /// // That closure is fine, but now we can't use `ok` anywhere else, /// // since it is owend by the previous task: /// // s.spawn(|_| println!("ok: {:?}", ok)); /// }); /// ``` /// /// While this works, it could be a problem if we want to use `ok` elsewhere. /// There are two choices. We can keep the closure as a `move` closure, but /// instead of referencing the variable `ok`, we create a shadowed variable that /// is a borrow of `ok` and capture *that*: /// /// ```rust /// # use rayon_core as rayon; /// let ok: Vec = vec![1, 2, 3]; /// rayon::scope(|s| { /// let bad: Vec = vec![4, 5, 6]; /// let ok: &Vec = &ok; // shadow the original `ok` /// s.spawn(move |_| { /// println!("ok: {:?}", ok); // captures the shadowed version /// println!("bad: {:?}", bad); /// }); /// /// // Now we too can use the shadowed `ok`, since `&Vec` references /// // can be shared freely. Note that we need a `move` closure here though, /// // because otherwise we'd be trying to borrow the shadowed `ok`, /// // and that doesn't outlive `scope`. /// s.spawn(move |_| println!("ok: {:?}", ok)); /// }); /// ``` /// /// Another option is not to use the `move` keyword but instead to take ownership /// of individual variables: /// /// ```rust /// # use rayon_core as rayon; /// let ok: Vec = vec![1, 2, 3]; /// rayon::scope(|s| { /// let bad: Vec = vec![4, 5, 6]; /// s.spawn(|_| { /// // Transfer ownership of `bad` into a local variable (also named `bad`). /// // This will force the closure to take ownership of `bad` from the environment. /// let bad = bad; /// println!("ok: {:?}", ok); // `ok` is only borrowed. /// println!("bad: {:?}", bad); // refers to our local variable, above. /// }); /// /// s.spawn(|_| println!("ok: {:?}", ok)); // we too can borrow `ok` /// }); /// ``` /// /// # Panics /// /// If a panic occurs, either in the closure given to `scope()` or in /// any of the spawned jobs, that panic will be propagated and the /// call to `scope()` will panic. If multiple panics occurs, it is /// non-deterministic which of their panic values will propagate. /// Regardless, once a task is spawned using `scope.spawn()`, it will /// execute, even if the spawning task should later panic. `scope()` /// returns once all spawned jobs have completed, and any panics are /// propagated at that point. pub fn scope<'scope, OP, R>(op: OP) -> R where OP: for<'s> FnOnce(&'s Scope<'scope>) -> R + 'scope + Send, R: Send, { in_worker(|owner_thread, _| { let scope = Scope::<'scope>::new(owner_thread); unsafe { scope.base.complete(owner_thread, || op(&scope)) } }) } /// Create a "fork-join" scope `s` with FIFO order, and invokes the /// closure with a reference to `s`. This closure can then spawn /// asynchronous tasks into `s`. Those tasks may run asynchronously with /// respect to the closure; they may themselves spawn additional tasks /// into `s`. When the closure returns, it will block until all tasks /// that have been spawned into `s` complete. /// /// # Task execution /// /// Tasks in a `scope_fifo()` run similarly to [`scope()`], but there's a /// difference in the order of execution. Consider a similar example: /// /// [`scope()`]: fn.scope.html /// /// ```rust /// # use rayon_core as rayon; /// // point start /// rayon::scope_fifo(|s| { /// s.spawn_fifo(|s| { // task s.1 /// s.spawn_fifo(|s| { // task s.1.1 /// rayon::scope_fifo(|t| { /// t.spawn_fifo(|_| ()); // task t.1 /// t.spawn_fifo(|_| ()); // task t.2 /// }); /// }); /// }); /// s.spawn_fifo(|s| { // task s.2 /// }); /// // point mid /// }); /// // point end /// ``` /// /// The various tasks that are run will execute roughly like so: /// /// ```notrust /// | (start) /// | /// | (FIFO scope `s` created) /// +--------------------+ (task s.1) /// +-------+ (task s.2) | /// | | +---+ (task s.1.1) /// | | | | /// | | | | (FIFO scope `t` created) /// | | | +----------------+ (task t.1) /// | | | +---+ (task t.2) | /// | (mid) | | | | | /// : | | + <-+------------+ (scope `t` ends) /// : | | | /// |<------+------------+---+ (scope `s` ends) /// | /// | (end) /// ``` /// /// Under `scope_fifo()`, the spawns are prioritized in a FIFO order on /// the thread from which they were spawned, as opposed to `scope()`'s /// LIFO. So in this example, we can expect `s.1` to execute before /// `s.2`, and `t.1` before `t.2`. Other threads also steal tasks in /// FIFO order, as usual. Overall, this has roughly the same order as /// the now-deprecated [`breadth_first`] option, except the effect is /// isolated to a particular scope. If spawns are intermingled from any /// combination of `scope()` and `scope_fifo()`, or from different /// threads, their order is only specified with respect to spawns in the /// same scope and thread. /// /// For more details on this design, see Rayon [RFC #1]. /// /// [`breadth_first`]: struct.ThreadPoolBuilder.html#method.breadth_first /// [RFC #1]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0001-scope-scheduling.md /// /// # Panics /// /// If a panic occurs, either in the closure given to `scope_fifo()` or /// in any of the spawned jobs, that panic will be propagated and the /// call to `scope_fifo()` will panic. If multiple panics occurs, it is /// non-deterministic which of their panic values will propagate. /// Regardless, once a task is spawned using `scope.spawn_fifo()`, it /// will execute, even if the spawning task should later panic. /// `scope_fifo()` returns once all spawned jobs have completed, and any /// panics are propagated at that point. pub fn scope_fifo<'scope, OP, R>(op: OP) -> R where OP: for<'s> FnOnce(&'s ScopeFifo<'scope>) -> R + 'scope + Send, R: Send, { in_worker(|owner_thread, _| { let scope = ScopeFifo::<'scope>::new(owner_thread); unsafe { scope.base.complete(owner_thread, || op(&scope)) } }) } impl<'scope> Scope<'scope> { fn new(owner_thread: &WorkerThread) -> Self { Scope { base: ScopeBase::new(owner_thread), } } /// Spawns a job into the fork-join scope `self`. This job will /// execute sometime before the fork-join scope completes. The /// job is specified as a closure, and this closure receives its /// own reference to the scope `self` as argument. This can be /// used to inject new jobs into `self`. /// /// # Returns /// /// Nothing. The spawned closures cannot pass back values to the /// caller directly, though they can write to local variables on /// the stack (if those variables outlive the scope) or /// communicate through shared channels. /// /// (The intention is to eventualy integrate with Rust futures to /// support spawns of functions that compute a value.) /// /// # Examples /// /// ```rust /// # use rayon_core as rayon; /// let mut value_a = None; /// let mut value_b = None; /// let mut value_c = None; /// rayon::scope(|s| { /// s.spawn(|s1| { /// // ^ this is the same scope as `s`; this handle `s1` /// // is intended for use by the spawned task, /// // since scope handles cannot cross thread boundaries. /// /// value_a = Some(22); /// /// // the scope `s` will not end until all these tasks are done /// s1.spawn(|_| { /// value_b = Some(44); /// }); /// }); /// /// s.spawn(|_| { /// value_c = Some(66); /// }); /// }); /// assert_eq!(value_a, Some(22)); /// assert_eq!(value_b, Some(44)); /// assert_eq!(value_c, Some(66)); /// ``` /// /// # See also /// /// The [`scope` function] has more extensive documentation about /// task spawning. /// /// [`scope` function]: fn.scope.html pub fn spawn(&self, body: BODY) where BODY: FnOnce(&Scope<'scope>) + Send + 'scope, { self.base.increment(); unsafe { let job_ref = Box::new(HeapJob::new(move || { self.base.execute_job(move || body(self)) })) .as_job_ref(); // Since `Scope` implements `Sync`, we can't be sure that we're still in a // thread of this pool, so we can't just push to the local worker thread. self.base.registry.inject_or_push(job_ref); } } } impl<'scope> ScopeFifo<'scope> { fn new(owner_thread: &WorkerThread) -> Self { let num_threads = owner_thread.registry().num_threads(); ScopeFifo { base: ScopeBase::new(owner_thread), fifos: (0..num_threads).map(|_| JobFifo::new()).collect(), } } /// Spawns a job into the fork-join scope `self`. This job will /// execute sometime before the fork-join scope completes. The /// job is specified as a closure, and this closure receives its /// own reference to the scope `self` as argument. This can be /// used to inject new jobs into `self`. /// /// # See also /// /// This method is akin to [`Scope::spawn()`], but with a FIFO /// priority. The [`scope_fifo` function] has more details about /// this distinction. /// /// [`Scope::spawn()`]: struct.Scope.html#method.spawn /// [`scope_fifo` function]: fn.scope.html pub fn spawn_fifo(&self, body: BODY) where BODY: FnOnce(&ScopeFifo<'scope>) + Send + 'scope, { self.base.increment(); unsafe { let job_ref = Box::new(HeapJob::new(move || { self.base.execute_job(move || body(self)) })) .as_job_ref(); // If we're in the pool, use our scope's private fifo for this thread to execute // in a locally-FIFO order. Otherwise, just use the pool's global injector. match self.base.registry.current_thread() { Some(worker) => { let fifo = &self.fifos[worker.index()]; worker.push(fifo.push(job_ref)); } None => self.base.registry.inject(&[job_ref]), } } } } impl<'scope> ScopeBase<'scope> { /// Create the base of a new scope for the given worker thread fn new(owner_thread: &WorkerThread) -> Self { ScopeBase { owner_thread_index: owner_thread.index(), registry: owner_thread.registry().clone(), panic: AtomicPtr::new(ptr::null_mut()), job_completed_latch: CountLatch::new(), marker: PhantomData, } } fn increment(&self) { self.job_completed_latch.increment(); } /// Executes `func` as a job, either aborting or executing as /// appropriate. /// /// Unsafe because it must be executed on a worker thread. unsafe fn complete(&self, owner_thread: &WorkerThread, func: FUNC) -> R where FUNC: FnOnce() -> R, { let result = self.execute_job_closure(func); self.steal_till_jobs_complete(owner_thread); result.unwrap() // only None if `op` panicked, and that would have been propagated } /// Executes `func` as a job, either aborting or executing as /// appropriate. /// /// Unsafe because it must be executed on a worker thread. unsafe fn execute_job(&self, func: FUNC) where FUNC: FnOnce(), { let _: Option<()> = self.execute_job_closure(func); } /// Executes `func` as a job in scope. Adjusts the "job completed" /// counters and also catches any panic and stores it into /// `scope`. /// /// Unsafe because this must be executed on a worker thread. unsafe fn execute_job_closure(&self, func: FUNC) -> Option where FUNC: FnOnce() -> R, { match unwind::halt_unwinding(func) { Ok(r) => { self.job_completed_ok(); Some(r) } Err(err) => { self.job_panicked(err); None } } } unsafe fn job_panicked(&self, err: Box) { // capture the first error we see, free the rest let nil = ptr::null_mut(); let mut err = Box::new(err); // box up the fat ptr if self .panic .compare_exchange(nil, &mut *err, Ordering::Release, Ordering::Relaxed) .is_ok() { log!(JobPanickedErrorStored { owner_thread: self.owner_thread_index }); mem::forget(err); // ownership now transferred into self.panic } else { log!(JobPanickedErrorNotStored { owner_thread: self.owner_thread_index }); } self.job_completed_latch.set(); } unsafe fn job_completed_ok(&self) { log!(JobCompletedOk { owner_thread: self.owner_thread_index }); self.job_completed_latch.set(); } unsafe fn steal_till_jobs_complete(&self, owner_thread: &WorkerThread) { // wait for job counter to reach 0: owner_thread.wait_until(&self.job_completed_latch); // propagate panic, if any occurred; at this point, all // outstanding jobs have completed, so we can use a relaxed // ordering: let panic = self.panic.swap(ptr::null_mut(), Ordering::Relaxed); if !panic.is_null() { log!(ScopeCompletePanicked { owner_thread: owner_thread.index() }); let value: Box> = mem::transmute(panic); unwind::resume_unwinding(*value); } else { log!(ScopeCompleteNoPanic { owner_thread: owner_thread.index() }); } } } impl<'scope> fmt::Debug for Scope<'scope> { fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { fmt.debug_struct("Scope") .field("pool_id", &self.base.registry.id()) .field("owner_thread_index", &self.base.owner_thread_index) .field("panic", &self.base.panic) .field("job_completed_latch", &self.base.job_completed_latch) .finish() } } impl<'scope> fmt::Debug for ScopeFifo<'scope> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_struct("ScopeFifo") .field("num_fifos", &self.fifos.len()) .field("pool_id", &self.base.registry.id()) .field("owner_thread_index", &self.base.owner_thread_index) .field("panic", &self.base.panic) .field("job_completed_latch", &self.base.job_completed_latch) .finish() } } rayon-core-1.6.0/src/scope/test.rs010066400247370024737000000315051352066016100152740ustar0000000000000000use rand::{Rng, SeedableRng}; use rand_xorshift::XorShiftRng; use std::cmp; use std::iter::once; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Mutex; use std::vec; use unwind; use ThreadPoolBuilder; use {scope, scope_fifo, Scope}; #[test] fn scope_empty() { scope(|_| {}); } #[test] fn scope_result() { let x = scope(|_| 22); assert_eq!(x, 22); } #[test] fn scope_two() { let counter = &AtomicUsize::new(0); scope(|s| { s.spawn(move |_| { counter.fetch_add(1, Ordering::SeqCst); }); s.spawn(move |_| { counter.fetch_add(10, Ordering::SeqCst); }); }); let v = counter.load(Ordering::SeqCst); assert_eq!(v, 11); } #[test] fn scope_divide_and_conquer() { let counter_p = &AtomicUsize::new(0); scope(|s| s.spawn(move |s| divide_and_conquer(s, counter_p, 1024))); let counter_s = &AtomicUsize::new(0); divide_and_conquer_seq(&counter_s, 1024); let p = counter_p.load(Ordering::SeqCst); let s = counter_s.load(Ordering::SeqCst); assert_eq!(p, s); } fn divide_and_conquer<'scope>(scope: &Scope<'scope>, counter: &'scope AtomicUsize, size: usize) { if size > 1 { scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2)); scope.spawn(move |scope| divide_and_conquer(scope, counter, size / 2)); } else { // count the leaves counter.fetch_add(1, Ordering::SeqCst); } } fn divide_and_conquer_seq(counter: &AtomicUsize, size: usize) { if size > 1 { divide_and_conquer_seq(counter, size / 2); divide_and_conquer_seq(counter, size / 2); } else { // count the leaves counter.fetch_add(1, Ordering::SeqCst); } } struct Tree { value: T, children: Vec>, } impl Tree { fn iter<'s>(&'s self) -> vec::IntoIter<&'s T> { once(&self.value) .chain(self.children.iter().flat_map(Tree::iter)) .collect::>() // seems like it shouldn't be needed... but prevents overflow .into_iter() } fn update(&mut self, op: OP) where OP: Fn(&mut T) + Sync, T: Send, { scope(|s| self.update_in_scope(&op, s)); } fn update_in_scope<'scope, OP>(&'scope mut self, op: &'scope OP, scope: &Scope<'scope>) where OP: Fn(&mut T) + Sync, { let Tree { ref mut value, ref mut children, } = *self; scope.spawn(move |scope| { for child in children { scope.spawn(move |scope| child.update_in_scope(op, scope)); } }); op(value); } } fn random_tree(depth: usize) -> Tree { assert!(depth > 0); let mut seed = ::Seed::default(); (0..).zip(seed.as_mut()).for_each(|(i, x)| *x = i); let mut rng = XorShiftRng::from_seed(seed); random_tree1(depth, &mut rng) } fn random_tree1(depth: usize, rng: &mut XorShiftRng) -> Tree { let children = if depth == 0 { vec![] } else { (0..rng.gen_range(0, 4)) // somewhere between 0 and 3 children at each level .map(|_| random_tree1(depth - 1, rng)) .collect() }; Tree { value: rng.gen_range(0, 1_000_000), children, } } #[test] fn update_tree() { let mut tree: Tree = random_tree(10); let values: Vec = tree.iter().cloned().collect(); tree.update(|v| *v += 1); let new_values: Vec = tree.iter().cloned().collect(); assert_eq!(values.len(), new_values.len()); for (&i, &j) in values.iter().zip(&new_values) { assert_eq!(i + 1, j); } } /// Check that if you have a chain of scoped tasks where T0 spawns T1 /// spawns T2 and so forth down to Tn, the stack space should not grow /// linearly with N. We test this by some unsafe hackery and /// permitting an approx 10% change with a 10x input change. #[test] fn linear_stack_growth() { let builder = ThreadPoolBuilder::new().num_threads(1); let pool = builder.build().unwrap(); pool.install(|| { let mut max_diff = Mutex::new(0); let bottom_of_stack = 0; scope(|s| the_final_countdown(s, &bottom_of_stack, &max_diff, 5)); let diff_when_5 = *max_diff.get_mut().unwrap() as f64; scope(|s| the_final_countdown(s, &bottom_of_stack, &max_diff, 500)); let diff_when_500 = *max_diff.get_mut().unwrap() as f64; let ratio = diff_when_5 / diff_when_500; assert!( ratio > 0.9 && ratio < 1.1, "stack usage ratio out of bounds: {}", ratio ); }); } fn the_final_countdown<'scope>( s: &Scope<'scope>, bottom_of_stack: &'scope i32, max: &'scope Mutex, n: usize, ) { let top_of_stack = 0; let p = bottom_of_stack as *const i32 as usize; let q = &top_of_stack as *const i32 as usize; let diff = if p > q { p - q } else { q - p }; let mut data = max.lock().unwrap(); *data = cmp::max(diff, *data); if n > 0 { s.spawn(move |s| the_final_countdown(s, bottom_of_stack, max, n - 1)); } } #[test] #[should_panic(expected = "Hello, world!")] fn panic_propagate_scope() { scope(|_| panic!("Hello, world!")); } #[test] #[should_panic(expected = "Hello, world!")] fn panic_propagate_spawn() { scope(|s| s.spawn(|_| panic!("Hello, world!"))); } #[test] #[should_panic(expected = "Hello, world!")] fn panic_propagate_nested_spawn() { scope(|s| s.spawn(|s| s.spawn(|s| s.spawn(|_| panic!("Hello, world!"))))); } #[test] #[should_panic(expected = "Hello, world!")] fn panic_propagate_nested_scope_spawn() { scope(|s| s.spawn(|_| scope(|s| s.spawn(|_| panic!("Hello, world!"))))); } #[test] fn panic_propagate_still_execute_1() { let mut x = false; match unwind::halt_unwinding(|| { scope(|s| { s.spawn(|_| panic!("Hello, world!")); // job A s.spawn(|_| x = true); // job B, should still execute even though A panics }); }) { Ok(_) => panic!("failed to propagate panic"), Err(_) => assert!(x, "job b failed to execute"), } } #[test] fn panic_propagate_still_execute_2() { let mut x = false; match unwind::halt_unwinding(|| { scope(|s| { s.spawn(|_| x = true); // job B, should still execute even though A panics s.spawn(|_| panic!("Hello, world!")); // job A }); }) { Ok(_) => panic!("failed to propagate panic"), Err(_) => assert!(x, "job b failed to execute"), } } #[test] fn panic_propagate_still_execute_3() { let mut x = false; match unwind::halt_unwinding(|| { scope(|s| { s.spawn(|_| x = true); // spanwed job should still execute despite later panic panic!("Hello, world!"); }); }) { Ok(_) => panic!("failed to propagate panic"), Err(_) => assert!(x, "panic after spawn, spawn failed to execute"), } } #[test] fn panic_propagate_still_execute_4() { let mut x = false; match unwind::halt_unwinding(|| { scope(|s| { s.spawn(|_| panic!("Hello, world!")); x = true; }); }) { Ok(_) => panic!("failed to propagate panic"), Err(_) => assert!(x, "panic in spawn tainted scope"), } } macro_rules! test_order { ($scope:ident => $spawn:ident) => {{ let builder = ThreadPoolBuilder::new().num_threads(1); let pool = builder.build().unwrap(); pool.install(|| { let vec = Mutex::new(vec![]); $scope(|scope| { let vec = &vec; for i in 0..10 { scope.$spawn(move |scope| { for j in 0..10 { scope.$spawn(move |_| { vec.lock().unwrap().push(i * 10 + j); }); } }); } }); vec.into_inner().unwrap() }) }}; } #[test] fn lifo_order() { // In the absense of stealing, `scope()` runs its `spawn()` jobs in LIFO order. let vec = test_order!(scope => spawn); let expected: Vec = (0..100).rev().collect(); // LIFO -> reversed assert_eq!(vec, expected); } #[test] fn fifo_order() { // In the absense of stealing, `scope_fifo()` runs its `spawn_fifo()` jobs in FIFO order. let vec = test_order!(scope_fifo => spawn_fifo); let expected: Vec = (0..100).collect(); // FIFO -> natural order assert_eq!(vec, expected); } macro_rules! test_nested_order { ($outer_scope:ident => $outer_spawn:ident, $inner_scope:ident => $inner_spawn:ident) => {{ let builder = ThreadPoolBuilder::new().num_threads(1); let pool = builder.build().unwrap(); pool.install(|| { let vec = Mutex::new(vec![]); $outer_scope(|scope| { let vec = &vec; for i in 0..10 { scope.$outer_spawn(move |_| { $inner_scope(|scope| { for j in 0..10 { scope.$inner_spawn(move |_| { vec.lock().unwrap().push(i * 10 + j); }); } }); }); } }); vec.into_inner().unwrap() }) }}; } #[test] fn nested_lifo_order() { // In the absense of stealing, `scope()` runs its `spawn()` jobs in LIFO order. let vec = test_nested_order!(scope => spawn, scope => spawn); let expected: Vec = (0..100).rev().collect(); // LIFO -> reversed assert_eq!(vec, expected); } #[test] fn nested_fifo_order() { // In the absense of stealing, `scope_fifo()` runs its `spawn_fifo()` jobs in FIFO order. let vec = test_nested_order!(scope_fifo => spawn_fifo, scope_fifo => spawn_fifo); let expected: Vec = (0..100).collect(); // FIFO -> natural order assert_eq!(vec, expected); } #[test] fn nested_lifo_fifo_order() { // LIFO on the outside, FIFO on the inside let vec = test_nested_order!(scope => spawn, scope_fifo => spawn_fifo); let expected: Vec = (0..10) .rev() .flat_map(|i| (0..10).map(move |j| i * 10 + j)) .collect(); assert_eq!(vec, expected); } #[test] fn nested_fifo_lifo_order() { // FIFO on the outside, LIFO on the inside let vec = test_nested_order!(scope_fifo => spawn_fifo, scope => spawn); let expected: Vec = (0..10) .flat_map(|i| (0..10).rev().map(move |j| i * 10 + j)) .collect(); assert_eq!(vec, expected); } macro_rules! spawn_push { ($scope:ident . $spawn:ident, $vec:ident, $i:expr) => {{ $scope.$spawn(move |_| $vec.lock().unwrap().push($i)); }}; } /// Test spawns pushing a series of numbers, interleaved /// such that negative values are using an inner scope. macro_rules! test_mixed_order { ($outer_scope:ident => $outer_spawn:ident, $inner_scope:ident => $inner_spawn:ident) => {{ let builder = ThreadPoolBuilder::new().num_threads(1); let pool = builder.build().unwrap(); pool.install(|| { let vec = Mutex::new(vec![]); $outer_scope(|outer_scope| { let vec = &vec; spawn_push!(outer_scope.$outer_spawn, vec, 0); $inner_scope(|inner_scope| { spawn_push!(inner_scope.$inner_spawn, vec, -1); spawn_push!(outer_scope.$outer_spawn, vec, 1); spawn_push!(inner_scope.$inner_spawn, vec, -2); spawn_push!(outer_scope.$outer_spawn, vec, 2); spawn_push!(inner_scope.$inner_spawn, vec, -3); }); spawn_push!(outer_scope.$outer_spawn, vec, 3); }); vec.into_inner().unwrap() }) }}; } #[test] fn mixed_lifo_order() { // NB: the end of the inner scope makes us execute some of the outer scope // before they've all been spawned, so they're not perfectly LIFO. let vec = test_mixed_order!(scope => spawn, scope => spawn); let expected = vec![-3, 2, -2, 1, -1, 3, 0]; assert_eq!(vec, expected); } #[test] fn mixed_fifo_order() { let vec = test_mixed_order!(scope_fifo => spawn_fifo, scope_fifo => spawn_fifo); let expected = vec![-1, 0, -2, 1, -3, 2, 3]; assert_eq!(vec, expected); } #[test] fn mixed_lifo_fifo_order() { // NB: the end of the inner scope makes us execute some of the outer scope // before they've all been spawned, so they're not perfectly LIFO. let vec = test_mixed_order!(scope => spawn, scope_fifo => spawn_fifo); let expected = vec![-1, 2, -2, 1, -3, 3, 0]; assert_eq!(vec, expected); } #[test] fn mixed_fifo_lifo_order() { let vec = test_mixed_order!(scope_fifo => spawn_fifo, scope => spawn); let expected = vec![-3, 0, -2, 1, -1, 2, 3]; assert_eq!(vec, expected); } rayon-core-1.6.0/src/sleep/README.md010066400247370024737000000442101336642636100152340ustar0000000000000000# Introduction: the sleep module The code in this module governs when worker threads should go to sleep. This is a tricky topic -- the work-stealing algorithm relies on having active worker threads running around stealing from one another. But, if there isn't a lot of work, this can be a bit of a drag, because it requires high CPU usage. The code in this module takes a fairly simple approach to the problem. It allows worker threads to fall asleep if they have failed to steal work after various thresholds; however, whenever new work appears, they will wake up briefly and try to steal again. There are some shortcomings in this current approach: - it can (to some extent) scale *down* the amount of threads, but they can never be scaled *up*. The latter might be useful in the case of user tasks that must (perhaps very occasionally and unpredictably) block for long periods of time. - however, the preferred approach to this is for users to adopt futures instead (and indeed this sleeping work is intended to enable future integration). - we have no way to wake up threads in a fine-grained or targeted manner. The current system wakes up *all* sleeping threads whenever *any* of them might be interested in an event. This means that while we can scale CPU usage down, we do is in a fairly "bursty" manner, where everyone comes online, then some of them go back offline. # The interface for workers Workers interact with the sleep module by invoking three methods: - `work_found()`: signals that the worker found some work and is about to execute it. - `no_work_found()`: signals that the worker searched all available sources for work and found none. - It is important for the coherence of the algorithm that if work was available **before the search started**, it would have been found. If work was made available during the search, then it's ok that it might have been overlooked. - `tickle()`: indicates that new work is available (e.g., a job has been pushed to the local deque) or that some other blocking condition has been resolved (e.g., a latch has been set). Wakes up any sleeping workers. When in a loop searching for work, Workers also have to maintain an integer `yields` that they provide to the `sleep` module (which will return a new value for the next time). Thus the basic worker "find work" loop looks like this (this is `wait_until()`, basically): ```rust let mut yields = 0; while /* not done */ { if let Some(job) = search_for_work() { yields = work_found(self.index, yields); } else { yields = no_work_found(self.index, yields); } } ``` # Getting sleepy and falling asleep The basic idea here is that every worker goes through three states: - **Awake:** actively hunting for tasks. - **Sleepy:** still actively hunting for tasks, but we have signaled that we might go to sleep soon if we don't find any. - **Asleep:** actually asleep (blocked on a condition variable). At any given time, only **one** worker can be in the sleepy state. This allows us to coordinate the entire sleep protocol using a single `AtomicUsize` and without the need of epoch counters or other things that might rollover and so forth. Whenever a worker invokes `work_found()`, it transitions back to the **awake** state. In other words, if it was sleepy, it stops being sleepy. (`work_found()` cannot be invoked when the worker is asleep, since then it is not doing anything.) On the other hand, whenever a worker invokes `no_work_found()`, it *may* transition to a more sleepy state. To track this, we use the counter `yields` that is maintained by the worker's steal loop. This counter starts at 0. Whenever work is found, the counter is returned to 0. But each time that **no** work is found, the counter is incremented. Eventually it will reach a threshold `ROUNDS_UNTIL_SLEEPY`. At this point, the worker will try to become the sleepy one. It does this by executing a CAS into the global registry state (details on this below). If that attempt is successful, then the counter is incremented again, so that it is equal to `ROUNDS_UNTIL_SLEEPY + 1`. Otherwise, the counter stays the same (and hence we will keep trying to become sleepy until either work is found or we are successful). Becoming sleepy does not put us to sleep immediately. Instead, we keep iterating and looking for work for some further number of rounds. If during this search we **do** find work, then we will return the counter to 0 and also reset the global state to indicate we are no longer sleepy. But if again no work is found, `yields` will eventually reach the value `ROUNDS_UNTIL_ASLEEP`. At that point, we will try to transition from **sleepy** to **asleep**. This is done by the helper fn `sleep()`, which executes another CAS on the global state that removes our worker as the sleepy worker and instead sets a flag to indicate that there are sleeping workers present (the flag may already have been set, that's ok). Assuming that CAS succeeds, we will block on a condition variable. # Tickling workers Of course, while all the stuff in the previous section is happening, other workers are (hopefully) producing new work. There are three kinds of events that can allow a blocked worker to make progress: 1. A new task is pushed onto a worker's deque. This task could be stolen. 2. A new task is injected into the thread-pool from the outside. This task could be uninjected and executed. 3. A latch is set. One of the sleeping workers might have been waiting for that before it could go on. Whenever one of these things happens, the worker (or thread, more generally) responsible must invoke `tickle()`. Tickle will basically wake up **all** the workers: - If any worker was the sleepy one, then the global state is changed so that there is no sleepy worker. The sleepy one will notice this when it next invokes `no_work_found()` and return to the *awake* state (with a yield counter of zero). - If any workers were actually **asleep**, then we invoke `notify_all()` on the condition variable, which will cause them to awaken and start over from the awake state (with a yield counter of zero). Because `tickle()` is invoked very frequently -- and hopefully most of the time it is not needed, because the workers are already actively stealing -- it is important that it be very cheap. The current design requires, in the case where nobody is even sleepy, just a load and a compare. If there are sleepy workers, a swap is needed. If there workers *asleep*, we must naturally acquire the lock and signal the condition variable. # The global state We manage all of the above state transitions using a small bit of global state (well, global to the registry). This is stored in the `Sleep` struct. The primary thing is a single `AtomicUsize`. The value in this usize packs in two pieces of information: 1. **Are any workers asleep?** This is just one bit (yes or no). 2. **Which worker is the sleepy worker, if any?** This is a worker id. We use bit 0 to indicate whether any workers are asleep. So if `state & 1` is zero, then no workers are sleeping. But if `state & 1` is 1, then some workers are either sleeping or on their way to falling asleep (i.e., they have acquired the lock). The remaining bits are used to store if there is a sleepy worker. We want `0` to indicate that there is no sleepy worker. If there a sleepy worker with index `worker_index`, we would store `(worker_index + 1) << 1` . The `+1` is there because worker indices are 0-based, so this ensures that the value is non-zero, and the shift skips over the sleepy bit. Some examples: - `0`: everyone is awake, nobody is sleepy - `1`: some workers are asleep, no sleepy worker - `2`: no workers are asleep, but worker 0 is sleepy (`(0 + 1) << 1 == 2`). - `3`: some workers are asleep, and worker 0 is sleepy. # Correctness level 1: avoiding deadlocks etc In general, we do not want to miss wakeups. Two bad things could happen: - **Suboptimal performance**: If this is a wakeup about a new job being pushed into a local deque, it won't deadlock, but it will cause things to run slowly. The reason that it won't deadlock is that we know at least one thread is active (the one doing the pushing), and it will (sooner or later) try to pop this item from its own local deque. - **Deadlocks:** If this is a wakeup about an injected job or a latch that got set, however, this can cause deadlocks. In the former case, if a job is injected but no thread ever wakes to process it, the injector will likely block forever. In the latter case, imagine this scenario: - thread A calls join, forking a task T1, then executing task T2 - thread B steals T1, forks a task T3, and executes T4. - thread A completes task T2 and blocks on T1 - thread A steals task T3 from thread B - thread B finishes T4 and goes to sleep, blocking on T3 - thread A completes task T3 and makes a wakeup, but it gets lost At this point, thread B is still asleep and will never signal T2, so thread A will itself go to sleep. Bad. It turns out that guaranteeing we don't miss a wakeup while retaining good performance is fairly tricky. This is because of some details of the C++11 memory model. But let's ignore those for now and generally assume sequential consistency. In that case, our scheme should work perfectly. Even if you assume seqcst, though, ensuring that you don't miss wakeups can be fairly tricky in the absence of a central queue. For example, consider the simplest scheme: imagine we just had a boolean flag indicating whether anyone was asleep. Then you could imagine that when workers find no work, they flip this flag to true. When work is published, if the flag is true, we issue a wakeup. The problem here is that checking for new work is not an atomic action. So it's possible that worker 1 could start looking for work and (say) see that worker 0's queue is empty and then search workers 2..N. While that searching is taking place, worker 0 publishes some new work. At the time when the new work is published, the "anyone sleeping?" flag is still false, so nothing happens. Then worker 1, who failed to find any work, goes to sleep --- completely missing the wakeup! We use the "sleepy worker" idea to sidestep this problem. Under our scheme, instead of going right to sleep at the end, worker 1 would become sleepy. Worker 1 would then do **at least** one additional scan. During this scan, they should find the work published by worker 0, so they will stop being sleepy and go back to work (here of course we are assuming that no one else has stolen the worker 0 work yet; if someone else stole it, worker 1 may still go to sleep, but that's ok, since there is no more work to be had). Now you may be wondering -- how does being sleepy help? What if, instead of publishing its job right before worker 1 became sleepy, worker 0 wait until right before worker 1 was going to go to sleep? In other words, the sequence was like this: - worker 1 gets sleepy - worker 1 starts its scan, scanning worker 0's deque - worker 0 publishes its job, but nobody is sleeping yet, so no wakeups occur - worker 1 finshes its scan, goes to sleep, missing the wakeup The reason that this doesn't occur is because, when worker 0 publishes its job, it will see that there is a sleepy worker. It will clear the global state to 0. Then, when worker 1 its scan, it will notice that it is no longer sleepy, and hence it will not go to sleep. Instead it will awaken and keep searching for work. The sleepy worker phase thus also serves as a cheap way to signal that work is around: instead of doing the whole dance of acquiring a lock and issuing notifications, when we publish work we can just swap a single atomic counter and let the sleepy worker notice that on their own. ## Beyond seq-cst Unfortunately, the C++11 memory model doesn't generally guarantee seq-cst. And, somewhat annoyingly, it's not easy for the sleep module **in isolation** to guarantee the properties the need. The key challenge has to do with the *synchronized-with* relation. Typically, we try to use acquire-release reasoning, and in that case the idea is that **if** a load observes a store, it will also observe those writes that preceded the store. But nothing says that the load **must** observe the store -- at least not right away. The place that this is most relevant is the load in the `tickle()` routine. The routine begins by reading from the global state. If it sees anything other than 0, it then does a swap and -- if necessary -- acquires a lock and does a notify. This load is a seq-cst load (as are the other accesses in tickle). This ensures that it is sensible to talk about a tickle happening *before* a worker gets sleepy and so forth. It turns out that to get things right, if we use the current tickle routine, we have to use seq-cst operations **both in the sleep module and when publishing work**. We'll walk through two scenarios to show what I mean. ### Scenario 1: get-sleepy-then-get-tickled This scenario shows why the operations in sleep must be seq-cst. We want to ensure that once a worker gets sleepy, any other worker that does a tickle will observe that. In other words, we want to ensure that the following scenario **cannot happen**: 1. worker 1 is blocked on latch L 2. worker 1 becomes sleepy - becoming sleepy involves a CAS on the global state to set it to 4 ("worker 1 is sleepy") 3. worker 0 sets latch L 4. worker 0 tickles **but does not see that worker 0 is sleepy** Let's diagram this. The notation `read_xxx(A) = V` means that a read of location `A` was executed with the result `V`. The `xxx` is the ordering and the location `A` is either `L` (latch) or `S` (global state). I will leave the ordering on the latch as `xxx` as it is not relevant here. The numbers correspond to the steps above. ``` worker 0 worker 1 | +- 2: cas_sc(S, 4) s| 3: write_xxx(L) + b| 4: read_sc(S) = ??? <-sc-+ v ``` Clearly, this cannot happen with sc orderings, because read 4 will always return `4` here. However, if we tried to use acquire-release orderings on the global state, then there would be **no guarantee** that the tickle will observe that a sleepy worker occurred. We would be guaranteed only that worker 0 would **eventually** observe that worker 1 had become sleepy (and, at that time, that it would see other writes). But it could take time -- and if we indeed miss that worker 1 is sleepy, it could lead to deadlock or loss of efficiency, as explained earlier. ### Scenario 2: tickle-then-get-sleepy This scenario shows why latch operations must *also* be seq-cst (and, more generally, any operations that publish work before a tickle). We wish to ensure that this ordering of events **cannot occur**: 1. worker 1 is blocked on latch L 2. worker 1 reads latch L, sees false, starts searching for work 3. worker 0 sets latch L 4. worker 0 tickles - the tickle reads from the global state, sees 0 5. worker 1 finishes searching, becomes sleepy - becoming sleepy involves a CAS on the global state to set it to 4 ("worker 1 is sleepy") 6. worker 1 reads latch L **but does not see that worker 0 set it** 7. worker 1 may then proceed to become sleepy In other words, we want to ensure that if worker 0 sets a latch and does a tickle *before worker 1 gets sleepy*, then worker 1 will observe that latch as set when it calls probe. We'll see that, with the current scheme, this implies that the latch memory orderings must be seq-cst as well. Here is the diagram: ``` worker 0 worker 1 | 2: read_xxx(L) = false s| 3: write_xxx(L, true) b| 4: read_sc(S) = 0 -+ | +-sc---> 5: cas_sc(S, 4) v 6: read_xxx(L) = ??? ``` The diagram shows that each thread's actions are related by *sequenced-before* (sb). Moreover the read and write of `S` are related by `sc` (the seq-cst ordering). However, and this is crucial, this **does not** imply that oper 4 *synchronizes-with* oper 5. This is because a read never synchronizes-with a store, only the reverse. Hence, if the latch were using acq-rel orderings, it would be legal for oper 6 to return false. But if the latch were to use an **sc** ordering itself, then we know that oper 6 must return true, since `3 -sc-> 4 -sc-> 5 -sc-> 6`. **Note** that this means that, before we tickle, we must execute some seq-cst stores to publish our work (and during the scan we must load from those same locations) **if we wish to guarantee that the work we published WILL be seen by the other threads** (as opposed to *may*). This is true for setting a latch -- if a latch is set but another thread misses it, then the system could deadlock. However, in the case of pushing new work to a deque, we choose not to use a seqcst ordering. This is for several reasons: - If we miss a wakeup, the consequences are less dire: we simply run less efficiently (after all, the current thread will eventually complete its current task and pop the new task off the deque). - It is inconvenient: The deque code is beyond our control (it lies in another package). However, we could create a dummy `AtomicBool` for each deque and do a seqcst write to it (with whatever value) after we push to the deque, and a seqcst load whenever we steal from the deque. - The cost of using a dummy variable was found to be quite high for some benchmarks: - 8-10% overhead on nbody-parreduce - 15% overhead on increment-all - 40% overhead on join-recursively ### Alternative solutions In both cases above, our problems arose because tickle is merely performing a seq-cst read. If we instead had tickle perform a release *swap*, that would be a write action of the global state. No matter the ordering mode, all writes to the same memory location have a total ordering, and hence we would not have to worry about others storing a value that we fail to read (as in scenario 1). Similarly, as a release write, a swap during tickle would synchronize-with a later cas and so scenario 2 should be averted. So you might wonder why we don't do that. The simple reason was that it didn't perform as well! In my measurements, many benchmarks were unaffected by using a swap, but some of them were hit hard: - 8-10% overhead on nbody-parreduce - 35% overhead on increment-all - 245% overhead on join-recursively rayon-core-1.6.0/src/sleep/mod.rs010066400247370024737000000252551352535224200151030ustar0000000000000000//! Code that decides when workers should go to sleep. See README.md //! for an overview. use log::Event::*; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Condvar, Mutex}; use std::thread; use std::usize; pub(super) struct Sleep { state: AtomicUsize, data: Mutex<()>, tickle: Condvar, } const AWAKE: usize = 0; const SLEEPING: usize = 1; const ROUNDS_UNTIL_SLEEPY: usize = 32; const ROUNDS_UNTIL_ASLEEP: usize = 64; impl Sleep { pub(super) fn new() -> Sleep { Sleep { state: AtomicUsize::new(AWAKE), data: Mutex::new(()), tickle: Condvar::new(), } } fn anyone_sleeping(&self, state: usize) -> bool { state & SLEEPING != 0 } fn any_worker_is_sleepy(&self, state: usize) -> bool { (state >> 1) != 0 } fn worker_is_sleepy(&self, state: usize, worker_index: usize) -> bool { (state >> 1) == (worker_index + 1) } fn with_sleepy_worker(&self, state: usize, worker_index: usize) -> usize { debug_assert!(state == AWAKE || state == SLEEPING); ((worker_index + 1) << 1) + state } #[inline] pub(super) fn work_found(&self, worker_index: usize, yields: usize) -> usize { log!(FoundWork { worker: worker_index, yields: yields, }); if yields > ROUNDS_UNTIL_SLEEPY { // FIXME tickling here is a bit extreme; mostly we want to "release the lock" // from us being sleepy, we don't necessarily need to wake others // who are sleeping self.tickle(worker_index); } 0 } #[inline] pub(super) fn no_work_found(&self, worker_index: usize, yields: usize) -> usize { log!(DidNotFindWork { worker: worker_index, yields: yields, }); if yields < ROUNDS_UNTIL_SLEEPY { thread::yield_now(); yields + 1 } else if yields == ROUNDS_UNTIL_SLEEPY { thread::yield_now(); if self.get_sleepy(worker_index) { yields + 1 } else { yields } } else if yields < ROUNDS_UNTIL_ASLEEP { thread::yield_now(); if self.still_sleepy(worker_index) { yields + 1 } else { log!(GotInterrupted { worker: worker_index }); 0 } } else { debug_assert_eq!(yields, ROUNDS_UNTIL_ASLEEP); self.sleep(worker_index); 0 } } pub(super) fn tickle(&self, worker_index: usize) { // As described in README.md, this load must be SeqCst so as to ensure that: // - if anyone is sleepy or asleep, we *definitely* see that now (and not eventually); // - if anyone after us becomes sleepy or asleep, they see memory events that // precede the call to `tickle()`, even though we did not do a write. let old_state = self.state.load(Ordering::SeqCst); if old_state != AWAKE { self.tickle_cold(worker_index); } } #[cold] fn tickle_cold(&self, worker_index: usize) { // The `Release` ordering here suffices. The reasoning is that // the atomic's own natural ordering ensure that any attempt // to become sleepy/asleep either will come before/after this // swap. If it comes *after*, then Release is good because we // want it to see the action that generated this tickle. If it // comes *before*, then we will see it here (but not other // memory writes from that thread). If the other worker was // becoming sleepy, the other writes don't matter. If they // were were going to sleep, we will acquire lock and hence // acquire their reads. let old_state = self.state.swap(AWAKE, Ordering::Release); log!(Tickle { worker: worker_index, old_state: old_state, }); if self.anyone_sleeping(old_state) { let _data = self.data.lock().unwrap(); self.tickle.notify_all(); } } fn get_sleepy(&self, worker_index: usize) -> bool { loop { // Acquire ordering suffices here. If some other worker // was sleepy but no longer is, we will eventually see // that, and until then it doesn't hurt to spin. // Otherwise, we will do a compare-exchange which will // assert a stronger order and acquire any reads etc that // we must see. let state = self.state.load(Ordering::Acquire); log!(GetSleepy { worker: worker_index, state: state, }); if self.any_worker_is_sleepy(state) { // somebody else is already sleepy, so we'll just wait our turn debug_assert!( !self.worker_is_sleepy(state, worker_index), "worker {} called `is_sleepy()`, \ but they are already sleepy (state={})", worker_index, state ); return false; } else { // make ourselves the sleepy one let new_state = self.with_sleepy_worker(state, worker_index); // This must be SeqCst on success because we want to // ensure: // // - That we observe any writes that preceded // some prior tickle, and that tickle may have only // done a SeqCst load on `self.state`. // - That any subsequent tickle *definitely* sees this store. // // See the section on "Ensuring Sequentially // Consistency" in README.md for more details. // // The failure ordering doesn't matter since we are // about to spin around and do a fresh load. if self .state .compare_exchange(state, new_state, Ordering::SeqCst, Ordering::Relaxed) .is_ok() { log!(GotSleepy { worker: worker_index, old_state: state, new_state: new_state, }); return true; } } } } fn still_sleepy(&self, worker_index: usize) -> bool { let state = self.state.load(Ordering::SeqCst); self.worker_is_sleepy(state, worker_index) } fn sleep(&self, worker_index: usize) { loop { // Acquire here suffices. If we observe that the current worker is still // sleepy, then in fact we know that no writes have occurred, and anyhow // we are going to do a CAS which will synchronize. // // If we observe that the state has changed, it must be // due to a tickle, and then the Acquire means we also see // any events that occured before that. let state = self.state.load(Ordering::Acquire); if self.worker_is_sleepy(state, worker_index) { // It is important that we hold the lock when we do // the CAS. Otherwise, if we were to CAS first, then // the following sequence of events could occur: // // - Thread A (us) sets state to SLEEPING. // - Thread B sets state to AWAKE. // - Thread C sets state to SLEEPY(C). // - Thread C sets state to SLEEPING. // - Thread A reawakens, acquires lock, and goes to sleep. // // Now we missed the wake-up from thread B! But since // we have the lock when we set the state to sleeping, // that cannot happen. Note that the swap `tickle()` // is not part of the lock, though, so let's play that // out: // // # Scenario 1 // // - A loads state and see SLEEPY(A) // - B swaps to AWAKE. // - A locks, fails CAS // // # Scenario 2 // // - A loads state and see SLEEPY(A) // - A locks, performs CAS // - B swaps to AWAKE. // - A waits (releasing lock) // - B locks, notifies // // In general, acquiring the lock inside the loop // seems like it could lead to bad performance, but // actually it should be ok. This is because the only // reason for the `compare_exchange` to fail is if an // awaken comes, in which case the next cycle around // the loop will just return. let data = self.data.lock().unwrap(); // This must be SeqCst on success because we want to // ensure: // // - That we observe any writes that preceded // some prior tickle, and that tickle may have only // done a SeqCst load on `self.state`. // - That any subsequent tickle *definitely* sees this store. // // See the section on "Ensuring Sequentially // Consistency" in README.md for more details. // // The failure ordering doesn't matter since we are // about to spin around and do a fresh load. if self .state .compare_exchange(state, SLEEPING, Ordering::SeqCst, Ordering::Relaxed) .is_ok() { // Don't do this in a loop. If we do it in a loop, we need // some way to distinguish the ABA scenario where the pool // was awoken but before we could process it somebody went // to sleep. Note that if we get a false wakeup it's not a // problem for us, we'll just loop around and maybe get // sleepy again. log!(FellAsleep { worker: worker_index }); let _ = self.tickle.wait(data).unwrap(); log!(GotAwoken { worker: worker_index }); return; } } else { log!(GotInterrupted { worker: worker_index }); return; } } } } rayon-core-1.6.0/src/spawn/mod.rs010066400247370024737000000143641352066016100151170ustar0000000000000000use job::*; use registry::Registry; use std::mem; use std::sync::Arc; use unwind; /// Fires off a task into the Rayon threadpool in the "static" or /// "global" scope. Just like a standard thread, this task is not /// tied to the current stack frame, and hence it cannot hold any /// references other than those with `'static` lifetime. If you want /// to spawn a task that references stack data, use [the `scope()` /// function][scope] to create a scope. /// /// [scope]: fn.scope.html /// /// Since tasks spawned with this function cannot hold references into /// the enclosing stack frame, you almost certainly want to use a /// `move` closure as their argument (otherwise, the closure will /// typically hold references to any variables from the enclosing /// function that you happen to use). /// /// This API assumes that the closure is executed purely for its /// side-effects (i.e., it might send messages, modify data protected /// by a mutex, or some such thing). If you want to compute a result, /// consider `spawn_future()`. /// /// There is no guaranteed order of execution for spawns, given that /// other threads may steal tasks at any time. However, they are /// generally prioritized in a LIFO order on the thread from which /// they were spawned. Other threads always steal from the other end of /// the deque, like FIFO order. The idea is that "recent" tasks are /// most likely to be fresh in the local CPU's cache, while other /// threads can steal older "stale" tasks. For an alternate approach, /// consider [`spawn_fifo()`] instead. /// /// [`spawn_fifo()`]: fn.spawn_fifo.html /// /// # Panic handling /// /// If this closure should panic, the resulting panic will be /// propagated to the panic handler registered in the `ThreadPoolBuilder`, /// if any. See [`ThreadPoolBuilder::panic_handler()`][ph] for more /// details. /// /// [ph]: struct.ThreadPoolBuilder.html#method.panic_handler /// /// # Examples /// /// This code creates a Rayon task that increments a global counter. /// /// ```rust /// # use rayon_core as rayon; /// use std::sync::atomic::{AtomicUsize, Ordering, ATOMIC_USIZE_INIT}; /// /// static GLOBAL_COUNTER: AtomicUsize = ATOMIC_USIZE_INIT; /// /// rayon::spawn(move || { /// GLOBAL_COUNTER.fetch_add(1, Ordering::SeqCst); /// }); /// ``` pub fn spawn(func: F) where F: FnOnce() + Send + 'static, { // We assert that current registry has not terminated. unsafe { spawn_in(func, &Registry::current()) } } /// Spawn an asynchronous job in `registry.` /// /// Unsafe because `registry` must not yet have terminated. pub(super) unsafe fn spawn_in(func: F, registry: &Arc) where F: FnOnce() + Send + 'static, { // We assert that this does not hold any references (we know // this because of the `'static` bound in the inferface); // moreover, we assert that the code below is not supposed to // be able to panic, and hence the data won't leak but will be // enqueued into some deque for later execution. let abort_guard = unwind::AbortIfPanic; // just in case we are wrong, and code CAN panic let job_ref = spawn_job(func, registry); registry.inject_or_push(job_ref); mem::forget(abort_guard); } unsafe fn spawn_job(func: F, registry: &Arc) -> JobRef where F: FnOnce() + Send + 'static, { // Ensure that registry cannot terminate until this job has // executed. This ref is decremented at the (*) below. registry.increment_terminate_count(); Box::new(HeapJob::new({ let registry = registry.clone(); move || { match unwind::halt_unwinding(func) { Ok(()) => {} Err(err) => { registry.handle_panic(err); } } registry.terminate(); // (*) permit registry to terminate now } })) .as_job_ref() } /// Fires off a task into the Rayon threadpool in the "static" or /// "global" scope. Just like a standard thread, this task is not /// tied to the current stack frame, and hence it cannot hold any /// references other than those with `'static` lifetime. If you want /// to spawn a task that references stack data, use [the `scope_fifo()` /// function](fn.scope_fifo.html) to create a scope. /// /// The behavior is essentially the same as [the `spawn` /// function](fn.spawn.html), except that calls from the same thread /// will be prioritized in FIFO order. This is similar to the now- /// deprecated [`breadth_first`] option, except the effect is isolated /// to relative `spawn_fifo` calls, not all threadpool tasks. /// /// For more details on this design, see Rayon [RFC #1]. /// /// [`breadth_first`]: struct.ThreadPoolBuilder.html#method.breadth_first /// [RFC #1]: https://github.com/rayon-rs/rfcs/blob/master/accepted/rfc0001-scope-scheduling.md /// /// # Panic handling /// /// If this closure should panic, the resulting panic will be /// propagated to the panic handler registered in the `ThreadPoolBuilder`, /// if any. See [`ThreadPoolBuilder::panic_handler()`][ph] for more /// details. /// /// [ph]: struct.ThreadPoolBuilder.html#method.panic_handler pub fn spawn_fifo(func: F) where F: FnOnce() + Send + 'static, { // We assert that current registry has not terminated. unsafe { spawn_fifo_in(func, &Registry::current()) } } /// Spawn an asynchronous FIFO job in `registry.` /// /// Unsafe because `registry` must not yet have terminated. pub(super) unsafe fn spawn_fifo_in(func: F, registry: &Arc) where F: FnOnce() + Send + 'static, { // We assert that this does not hold any references (we know // this because of the `'static` bound in the inferface); // moreover, we assert that the code below is not supposed to // be able to panic, and hence the data won't leak but will be // enqueued into some deque for later execution. let abort_guard = unwind::AbortIfPanic; // just in case we are wrong, and code CAN panic let job_ref = spawn_job(func, registry); // If we're in the pool, use our thread's private fifo for this thread to execute // in a locally-FIFO order. Otherwise, just use the pool's global injector. match registry.current_thread() { Some(worker) => worker.push_fifo(job_ref), None => registry.inject(&[job_ref]), } mem::forget(abort_guard); } #[cfg(test)] mod test; rayon-core-1.6.0/src/spawn/test.rs010066400247370024737000000160341353104652500153170ustar0000000000000000use scope; use std::any::Any; use std::sync::mpsc::channel; use std::sync::Mutex; use super::{spawn, spawn_fifo}; use ThreadPoolBuilder; #[test] fn spawn_then_join_in_worker() { let (tx, rx) = channel(); scope(move |_| { spawn(move || tx.send(22).unwrap()); }); assert_eq!(22, rx.recv().unwrap()); } #[test] fn spawn_then_join_outside_worker() { let (tx, rx) = channel(); spawn(move || tx.send(22).unwrap()); assert_eq!(22, rx.recv().unwrap()); } #[test] fn panic_fwd() { let (tx, rx) = channel(); let tx = Mutex::new(tx); let panic_handler = move |err: Box| { let tx = tx.lock().unwrap(); if let Some(&msg) = err.downcast_ref::<&str>() { if msg == "Hello, world!" { tx.send(1).unwrap(); } else { tx.send(2).unwrap(); } } else { tx.send(3).unwrap(); } }; let builder = ThreadPoolBuilder::new().panic_handler(panic_handler); builder .build() .unwrap() .spawn(move || panic!("Hello, world!")); assert_eq!(1, rx.recv().unwrap()); } /// Test what happens when the thread-pool is dropped but there are /// still active asynchronous tasks. We expect the thread-pool to stay /// alive and executing until those threads are complete. #[test] fn termination_while_things_are_executing() { let (tx0, rx0) = channel(); let (tx1, rx1) = channel(); // Create a thread-pool and spawn some code in it, but then drop // our reference to it. { let thread_pool = ThreadPoolBuilder::new().build().unwrap(); thread_pool.spawn(move || { let data = rx0.recv().unwrap(); // At this point, we know the "main" reference to the // `ThreadPool` has been dropped, but there are still // active threads. Launch one more. spawn(move || { tx1.send(data).unwrap(); }); }); } tx0.send(22).unwrap(); let v = rx1.recv().unwrap(); assert_eq!(v, 22); } #[test] fn custom_panic_handler_and_spawn() { let (tx, rx) = channel(); // Create a parallel closure that will send panics on the // channel; since the closure is potentially executed in parallel // with itself, we have to wrap `tx` in a mutex. let tx = Mutex::new(tx); let panic_handler = move |e: Box| { tx.lock().unwrap().send(e).unwrap(); }; // Execute an async that will panic. let builder = ThreadPoolBuilder::new().panic_handler(panic_handler); builder.build().unwrap().spawn(move || { panic!("Hello, world!"); }); // Check that we got back the panic we expected. let error = rx.recv().unwrap(); if let Some(&msg) = error.downcast_ref::<&str>() { assert_eq!(msg, "Hello, world!"); } else { panic!("did not receive a string from panic handler"); } } #[test] fn custom_panic_handler_and_nested_spawn() { let (tx, rx) = channel(); // Create a parallel closure that will send panics on the // channel; since the closure is potentially executed in parallel // with itself, we have to wrap `tx` in a mutex. let tx = Mutex::new(tx); let panic_handler = move |e| { tx.lock().unwrap().send(e).unwrap(); }; // Execute an async that will (eventually) panic. const PANICS: usize = 3; let builder = ThreadPoolBuilder::new().panic_handler(panic_handler); builder.build().unwrap().spawn(move || { // launch 3 nested spawn-asyncs; these should be in the same // thread-pool and hence inherit the same panic handler for _ in 0..PANICS { spawn(move || { panic!("Hello, world!"); }); } }); // Check that we get back the panics we expected. for _ in 0..PANICS { let error = rx.recv().unwrap(); if let Some(&msg) = error.downcast_ref::<&str>() { assert_eq!(msg, "Hello, world!"); } else { panic!("did not receive a string from panic handler"); } } } macro_rules! test_order { ($outer_spawn:ident, $inner_spawn:ident) => {{ let builder = ThreadPoolBuilder::new().num_threads(1); let pool = builder.build().unwrap(); let (tx, rx) = channel(); pool.install(move || { for i in 0..10 { let tx = tx.clone(); $outer_spawn(move || { for j in 0..10 { let tx = tx.clone(); $inner_spawn(move || { tx.send(i * 10 + j).unwrap(); }); } }); } }); rx.iter().collect::>() }}; } #[test] fn lifo_order() { // In the absense of stealing, `spawn()` jobs on a thread will run in LIFO order. let vec = test_order!(spawn, spawn); let expected: Vec = (0..100).rev().collect(); // LIFO -> reversed assert_eq!(vec, expected); } #[test] fn fifo_order() { // In the absense of stealing, `spawn_fifo()` jobs on a thread will run in FIFO order. let vec = test_order!(spawn_fifo, spawn_fifo); let expected: Vec = (0..100).collect(); // FIFO -> natural order assert_eq!(vec, expected); } #[test] fn lifo_fifo_order() { // LIFO on the outside, FIFO on the inside let vec = test_order!(spawn, spawn_fifo); let expected: Vec = (0..10) .rev() .flat_map(|i| (0..10).map(move |j| i * 10 + j)) .collect(); assert_eq!(vec, expected); } #[test] fn fifo_lifo_order() { // FIFO on the outside, LIFO on the inside let vec = test_order!(spawn_fifo, spawn); let expected: Vec = (0..10) .flat_map(|i| (0..10).rev().map(move |j| i * 10 + j)) .collect(); assert_eq!(vec, expected); } macro_rules! spawn_send { ($spawn:ident, $tx:ident, $i:expr) => {{ let tx = $tx.clone(); $spawn(move || tx.send($i).unwrap()); }}; } /// Test mixed spawns pushing a series of numbers, interleaved such /// such that negative values are using the second kind of spawn. macro_rules! test_mixed_order { ($pos_spawn:ident, $neg_spawn:ident) => {{ let builder = ThreadPoolBuilder::new().num_threads(1); let pool = builder.build().unwrap(); let (tx, rx) = channel(); pool.install(move || { spawn_send!($pos_spawn, tx, 0); spawn_send!($neg_spawn, tx, -1); spawn_send!($pos_spawn, tx, 1); spawn_send!($neg_spawn, tx, -2); spawn_send!($pos_spawn, tx, 2); spawn_send!($neg_spawn, tx, -3); spawn_send!($pos_spawn, tx, 3); }); rx.iter().collect::>() }}; } #[test] fn mixed_lifo_fifo_order() { let vec = test_mixed_order!(spawn, spawn_fifo); let expected = vec![3, -1, 2, -2, 1, -3, 0]; assert_eq!(vec, expected); } #[test] fn mixed_fifo_lifo_order() { let vec = test_mixed_order!(spawn_fifo, spawn); let expected = vec![0, -3, 1, -2, 2, -1, 3]; assert_eq!(vec, expected); } rayon-core-1.6.0/src/test.rs010066400247370024737000000132011352066016100141540ustar0000000000000000#![cfg(test)] use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Barrier}; #[allow(deprecated)] use Configuration; use {ThreadPoolBuildError, ThreadPoolBuilder}; #[test] fn worker_thread_index() { let pool = ThreadPoolBuilder::new().num_threads(22).build().unwrap(); assert_eq!(pool.current_num_threads(), 22); assert_eq!(pool.current_thread_index(), None); let index = pool.install(|| pool.current_thread_index().unwrap()); assert!(index < 22); } #[test] fn start_callback_called() { let n_threads = 16; let n_called = Arc::new(AtomicUsize::new(0)); // Wait for all the threads in the pool plus the one running tests. let barrier = Arc::new(Barrier::new(n_threads + 1)); let b = barrier.clone(); let nc = n_called.clone(); let start_handler = move |_| { nc.fetch_add(1, Ordering::SeqCst); b.wait(); }; let conf = ThreadPoolBuilder::new() .num_threads(n_threads) .start_handler(start_handler); let _ = conf.build().unwrap(); // Wait for all the threads to have been scheduled to run. barrier.wait(); // The handler must have been called on every started thread. assert_eq!(n_called.load(Ordering::SeqCst), n_threads); } #[test] fn exit_callback_called() { let n_threads = 16; let n_called = Arc::new(AtomicUsize::new(0)); // Wait for all the threads in the pool plus the one running tests. let barrier = Arc::new(Barrier::new(n_threads + 1)); let b = barrier.clone(); let nc = n_called.clone(); let exit_handler = move |_| { nc.fetch_add(1, Ordering::SeqCst); b.wait(); }; let conf = ThreadPoolBuilder::new() .num_threads(n_threads) .exit_handler(exit_handler); { let _ = conf.build().unwrap(); // Drop the pool so it stops the running threads. } // Wait for all the threads to have been scheduled to run. barrier.wait(); // The handler must have been called on every exiting thread. assert_eq!(n_called.load(Ordering::SeqCst), n_threads); } #[test] fn handler_panics_handled_correctly() { let n_threads = 16; let n_called = Arc::new(AtomicUsize::new(0)); // Wait for all the threads in the pool plus the one running tests. let start_barrier = Arc::new(Barrier::new(n_threads + 1)); let exit_barrier = Arc::new(Barrier::new(n_threads + 1)); let start_handler = move |_| { panic!("ensure panic handler is called when starting"); }; let exit_handler = move |_| { panic!("ensure panic handler is called when exiting"); }; let sb = start_barrier.clone(); let eb = exit_barrier.clone(); let nc = n_called.clone(); let panic_handler = move |_| { let val = nc.fetch_add(1, Ordering::SeqCst); if val < n_threads { sb.wait(); } else { eb.wait(); } }; let conf = ThreadPoolBuilder::new() .num_threads(n_threads) .start_handler(start_handler) .exit_handler(exit_handler) .panic_handler(panic_handler); { let _ = conf.build().unwrap(); // Wait for all the threads to start, panic in the start handler, // and been taken care of by the panic handler. start_barrier.wait(); // Drop the pool so it stops the running threads. } // Wait for all the threads to exit, panic in the exit handler, // and been taken care of by the panic handler. exit_barrier.wait(); // The panic handler must have been called twice on every thread. assert_eq!(n_called.load(Ordering::SeqCst), 2 * n_threads); } #[test] #[allow(deprecated)] fn check_config_build() { let pool = ThreadPoolBuilder::new().num_threads(22).build().unwrap(); assert_eq!(pool.current_num_threads(), 22); } /// Helper used by check_error_send_sync to ensure ThreadPoolBuildError is Send + Sync fn _send_sync() {} #[test] fn check_error_send_sync() { _send_sync::(); } #[allow(deprecated)] #[test] fn configuration() { let start_handler = move |_| {}; let exit_handler = move |_| {}; let panic_handler = move |_| {}; let thread_name = move |i| format!("thread_name_{}", i); // Ensure we can call all public methods on Configuration Configuration::new() .thread_name(thread_name) .num_threads(5) .panic_handler(panic_handler) .stack_size(4e6 as usize) .breadth_first() .start_handler(start_handler) .exit_handler(exit_handler) .build() .unwrap(); } #[test] fn default_pool() { ThreadPoolBuilder::default().build().unwrap(); } /// Test that custom spawned threads get their `WorkerThread` cleared once /// the pool is done with them, allowing them to be used with rayon again /// later. e.g. WebAssembly want to have their own pool of available threads. #[test] fn cleared_current_thread() -> Result<(), ThreadPoolBuildError> { let n_threads = 5; let mut handles = vec![]; let pool = ThreadPoolBuilder::new() .num_threads(n_threads) .spawn_handler(|thread| { let handle = std::thread::spawn(move || { thread.run(); // Afterward, the current thread shouldn't be set anymore. assert_eq!(crate::current_thread_index(), None); }); handles.push(handle); Ok(()) }) .build()?; assert_eq!(handles.len(), n_threads); pool.install(|| assert!(crate::current_thread_index().is_some())); drop(pool); // Wait for all threads to make their assertions and exit for handle in handles { handle.join().unwrap(); } Ok(()) } rayon-core-1.6.0/src/thread_pool/internal.rs010066400247370024737000000035531352066016100173220ustar0000000000000000#![cfg(rayon_unstable)] use super::ThreadPool; use internal::task::{ScopeHandle, Task, ToScopeHandle}; use registry::Registry; use std::any::Any; use std::fmt; use std::sync::Arc; impl ToScopeHandle<'static> for ThreadPool { type ScopeHandle = ThreadPoolScopeHandle; fn to_scope_handle(&self) -> Self::ScopeHandle { unsafe { ThreadPoolScopeHandle::new(self.registry.clone()) } } } pub struct ThreadPoolScopeHandle { registry: Arc, } impl fmt::Debug for ThreadPoolScopeHandle { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_struct("ThreadPoolScopeHandle") .field("pool", &self.registry.id()) .finish() } } impl ThreadPoolScopeHandle { /// Caller asserts that the registry has not yet terminated. unsafe fn new(registry: Arc) -> Self { registry.increment_terminate_count(); ThreadPoolScopeHandle { registry } } } impl Drop for ThreadPoolScopeHandle { fn drop(&mut self) { self.registry.terminate(); } } /// We assert that: /// /// (a) the scope valid remains valid until a completion method /// is called. In this case, "remains valid" means that the /// registry is not terminated. This is true because we /// acquire a "termination count" in `StaticFutureScope::new()` /// which is not released until `future_panicked()` or /// `future_completed()` is invoked. /// (b) the lifetime `'static` will not end until a completion /// method is called. This is true because `'static` doesn't /// end until the end of the program. unsafe impl ScopeHandle<'static> for ThreadPoolScopeHandle { unsafe fn spawn_task(&self, task: Arc) { self.registry.submit_task(task); } fn ok(self) {} fn panicked(self, err: Box) { self.registry.handle_panic(err); } } rayon-core-1.6.0/src/thread_pool/mod.rs010066400247370024737000000301251353200457200162610ustar0000000000000000//! Contains support for user-managed thread pools, represented by the //! the [`ThreadPool`] type (see that struct for details). //! //! [`ThreadPool`]: struct.ThreadPool.html use join; use registry::{Registry, ThreadSpawn, WorkerThread}; use spawn; use std::error::Error; use std::fmt; use std::sync::Arc; #[allow(deprecated)] use Configuration; use {scope, Scope}; use {scope_fifo, ScopeFifo}; use {ThreadPoolBuildError, ThreadPoolBuilder}; mod internal; mod test; /// Represents a user created [thread-pool]. /// /// Use a [`ThreadPoolBuilder`] to specify the number and/or names of threads /// in the pool. After calling [`ThreadPoolBuilder::build()`], you can then /// execute functions explicitly within this [`ThreadPool`] using /// [`ThreadPool::install()`]. By contrast, top level rayon functions /// (like `join()`) will execute implicitly within the current thread-pool. /// /// /// ## Creating a ThreadPool /// /// ```rust /// # use rayon_core as rayon; /// let pool = rayon::ThreadPoolBuilder::new().num_threads(8).build().unwrap(); /// ``` /// /// [`install()`][`ThreadPool::install()`] executes a closure in one of the `ThreadPool`'s /// threads. In addition, any other rayon operations called inside of `install()` will also /// execute in the context of the `ThreadPool`. /// /// When the `ThreadPool` is dropped, that's a signal for the threads it manages to terminate, /// they will complete executing any remaining work that you have spawned, and automatically /// terminate. /// /// /// [thread-pool]: https://en.wikipedia.org/wiki/Thread_pool /// [`ThreadPool`]: struct.ThreadPool.html /// [`ThreadPool::new()`]: struct.ThreadPool.html#method.new /// [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html /// [`ThreadPoolBuilder::build()`]: struct.ThreadPoolBuilder.html#method.build /// [`ThreadPool::install()`]: struct.ThreadPool.html#method.install pub struct ThreadPool { registry: Arc, } impl ThreadPool { #[deprecated(note = "Use `ThreadPoolBuilder::build`")] #[allow(deprecated)] /// Deprecated in favor of `ThreadPoolBuilder::build`. pub fn new(configuration: Configuration) -> Result> { Self::build(configuration.into_builder()).map_err(Box::from) } pub(super) fn build( builder: ThreadPoolBuilder, ) -> Result where S: ThreadSpawn, { let registry = Registry::new(builder)?; Ok(ThreadPool { registry }) } /// Returns a handle to the global thread pool. This is the pool /// that Rayon will use by default when you perform a `join()` or /// `scope()` operation, if no other thread-pool is installed. If /// no global thread-pool has yet been started when this function /// is called, then the global thread-pool will be created (with /// the default configuration). If you wish to configure the /// global thread-pool differently, then you can use [the /// `rayon::initialize()` function][f] to do so. /// /// [f]: fn.initialize.html #[cfg(rayon_unstable)] pub fn global() -> &'static Arc { lazy_static! { static ref DEFAULT_THREAD_POOL: Arc = Arc::new(ThreadPool { registry: Registry::global() }); } &DEFAULT_THREAD_POOL } /// Executes `op` within the threadpool. Any attempts to use /// `join`, `scope`, or parallel iterators will then operate /// within that threadpool. /// /// # Warning: thread-local data /// /// Because `op` is executing within the Rayon thread-pool, /// thread-local data from the current thread will not be /// accessible. /// /// # Panics /// /// If `op` should panic, that panic will be propagated. /// /// ## Using `install()` /// /// ```rust /// # use rayon_core as rayon; /// fn main() { /// let pool = rayon::ThreadPoolBuilder::new().num_threads(8).build().unwrap(); /// let n = pool.install(|| fib(20)); /// println!("{}", n); /// } /// /// fn fib(n: usize) -> usize { /// if n == 0 || n == 1 { /// return n; /// } /// let (a, b) = rayon::join(|| fib(n - 1), || fib(n - 2)); // runs inside of `pool` /// return a + b; /// } /// ``` pub fn install(&self, op: OP) -> R where OP: FnOnce() -> R + Send, R: Send, { self.registry.in_worker(|_, _| op()) } /// Returns the (current) number of threads in the thread pool. /// /// # Future compatibility note /// /// Note that unless this thread-pool was created with a /// [`ThreadPoolBuilder`] that specifies the number of threads, /// then this number may vary over time in future versions (see [the /// `num_threads()` method for details][snt]). /// /// [snt]: struct.ThreadPoolBuilder.html#method.num_threads /// [`ThreadPoolBuilder`]: struct.ThreadPoolBuilder.html #[inline] pub fn current_num_threads(&self) -> usize { self.registry.num_threads() } /// If called from a Rayon worker thread in this thread-pool, /// returns the index of that thread; if not called from a Rayon /// thread, or called from a Rayon thread that belongs to a /// different thread-pool, returns `None`. /// /// The index for a given thread will not change over the thread's /// lifetime. However, multiple threads may share the same index if /// they are in distinct thread-pools. /// /// # Future compatibility note /// /// Currently, every thread-pool (including the global /// thread-pool) has a fixed number of threads, but this may /// change in future Rayon versions (see [the `num_threads()` method /// for details][snt]). In that case, the index for a /// thread would not change during its lifetime, but thread /// indices may wind up being reused if threads are terminated and /// restarted. /// /// [snt]: struct.ThreadPoolBuilder.html#method.num_threads #[inline] pub fn current_thread_index(&self) -> Option { let curr = self.registry.current_thread()?; Some(curr.index()) } /// Returns true if the current worker thread currently has "local /// tasks" pending. This can be useful as part of a heuristic for /// deciding whether to spawn a new task or execute code on the /// current thread, particularly in breadth-first /// schedulers. However, keep in mind that this is an inherently /// racy check, as other worker threads may be actively "stealing" /// tasks from our local deque. /// /// **Background:** Rayon's uses a [work-stealing] scheduler. The /// key idea is that each thread has its own [deque] of /// tasks. Whenever a new task is spawned -- whether through /// `join()`, `Scope::spawn()`, or some other means -- that new /// task is pushed onto the thread's *local* deque. Worker threads /// have a preference for executing their own tasks; if however /// they run out of tasks, they will go try to "steal" tasks from /// other threads. This function therefore has an inherent race /// with other active worker threads, which may be removing items /// from the local deque. /// /// [work-stealing]: https://en.wikipedia.org/wiki/Work_stealing /// [deque]: https://en.wikipedia.org/wiki/Double-ended_queue #[inline] pub fn current_thread_has_pending_tasks(&self) -> Option { let curr = self.registry.current_thread()?; Some(!curr.local_deque_is_empty()) } /// Execute `oper_a` and `oper_b` in the thread-pool and return /// the results. Equivalent to `self.install(|| join(oper_a, /// oper_b))`. pub fn join(&self, oper_a: A, oper_b: B) -> (RA, RB) where A: FnOnce() -> RA + Send, B: FnOnce() -> RB + Send, RA: Send, RB: Send, { self.install(|| join(oper_a, oper_b)) } /// Creates a scope that executes within this thread-pool. /// Equivalent to `self.install(|| scope(...))`. /// /// See also: [the `scope()` function][scope]. /// /// [scope]: fn.scope.html pub fn scope<'scope, OP, R>(&self, op: OP) -> R where OP: for<'s> FnOnce(&'s Scope<'scope>) -> R + 'scope + Send, R: Send, { self.install(|| scope(op)) } /// Creates a scope that executes within this thread-pool. /// Spawns from the same thread are prioritized in relative FIFO order. /// Equivalent to `self.install(|| scope_fifo(...))`. /// /// See also: [the `scope_fifo()` function][scope_fifo]. /// /// [scope_fifo]: fn.scope_fifo.html pub fn scope_fifo<'scope, OP, R>(&self, op: OP) -> R where OP: for<'s> FnOnce(&'s ScopeFifo<'scope>) -> R + 'scope + Send, R: Send, { self.install(|| scope_fifo(op)) } /// Spawns an asynchronous task in this thread-pool. This task will /// run in the implicit, global scope, which means that it may outlast /// the current stack frame -- therefore, it cannot capture any references /// onto the stack (you will likely need a `move` closure). /// /// See also: [the `spawn()` function defined on scopes][spawn]. /// /// [spawn]: struct.Scope.html#method.spawn pub fn spawn(&self, op: OP) where OP: FnOnce() + Send + 'static, { // We assert that `self.registry` has not terminated. unsafe { spawn::spawn_in(op, &self.registry) } } /// Spawns an asynchronous task in this thread-pool. This task will /// run in the implicit, global scope, which means that it may outlast /// the current stack frame -- therefore, it cannot capture any references /// onto the stack (you will likely need a `move` closure). /// /// See also: [the `spawn_fifo()` function defined on scopes][spawn_fifo]. /// /// [spawn_fifo]: struct.ScopeFifo.html#method.spawn_fifo pub fn spawn_fifo(&self, op: OP) where OP: FnOnce() + Send + 'static, { // We assert that `self.registry` has not terminated. unsafe { spawn::spawn_fifo_in(op, &self.registry) } } } impl Drop for ThreadPool { fn drop(&mut self) { self.registry.terminate(); } } impl fmt::Debug for ThreadPool { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_struct("ThreadPool") .field("num_threads", &self.current_num_threads()) .field("id", &self.registry.id()) .finish() } } /// If called from a Rayon worker thread, returns the index of that /// thread within its current pool; if not called from a Rayon thread, /// returns `None`. /// /// The index for a given thread will not change over the thread's /// lifetime. However, multiple threads may share the same index if /// they are in distinct thread-pools. /// /// See also: [the `ThreadPool::current_thread_index()` method]. /// /// [m]: struct.ThreadPool.html#method.current_thread_index /// /// # Future compatibility note /// /// Currently, every thread-pool (including the global /// thread-pool) has a fixed number of threads, but this may /// change in future Rayon versions (see [the `num_threads()` method /// for details][snt]). In that case, the index for a /// thread would not change during its lifetime, but thread /// indices may wind up being reused if threads are terminated and /// restarted. /// /// [snt]: struct.ThreadPoolBuilder.html#method.num_threads #[inline] pub fn current_thread_index() -> Option { unsafe { let curr = WorkerThread::current().as_ref()?; Some(curr.index()) } } /// If called from a Rayon worker thread, indicates whether that /// thread's local deque still has pending tasks. Otherwise, returns /// `None`. For more information, see [the /// `ThreadPool::current_thread_has_pending_tasks()` method][m]. /// /// [m]: struct.ThreadPool.html#method.current_thread_has_pending_tasks #[inline] pub fn current_thread_has_pending_tasks() -> Option { unsafe { let curr = WorkerThread::current().as_ref()?; Some(!curr.local_deque_is_empty()) } } rayon-core-1.6.0/src/thread_pool/test.rs010066400247370024737000000174651352066016100164740ustar0000000000000000#![cfg(test)] use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::mpsc::channel; use std::sync::{Arc, Mutex}; use join; use thread_pool::ThreadPool; use unwind; #[allow(deprecated)] use Configuration; use ThreadPoolBuilder; #[test] #[should_panic(expected = "Hello, world!")] fn panic_propagate() { let thread_pool = ThreadPoolBuilder::new().build().unwrap(); thread_pool.install(|| { panic!("Hello, world!"); }); } #[test] fn workers_stop() { let registry; { // once we exit this block, thread-pool will be dropped let thread_pool = ThreadPoolBuilder::new().num_threads(22).build().unwrap(); registry = thread_pool.install(|| { // do some work on these threads join_a_lot(22); thread_pool.registry.clone() }); assert_eq!(registry.num_threads(), 22); } // once thread-pool is dropped, registry should terminate, which // should lead to worker threads stopping registry.wait_until_stopped(); } fn join_a_lot(n: usize) { if n > 0 { join(|| join_a_lot(n - 1), || join_a_lot(n - 1)); } } #[test] fn sleeper_stop() { use std::{thread, time}; let registry; { // once we exit this block, thread-pool will be dropped let thread_pool = ThreadPoolBuilder::new().num_threads(22).build().unwrap(); registry = thread_pool.registry.clone(); // Give time for at least some of the thread pool to fall asleep. thread::sleep(time::Duration::from_secs(1)); } // once thread-pool is dropped, registry should terminate, which // should lead to worker threads stopping registry.wait_until_stopped(); } /// Create a start/exit handler that increments an atomic counter. fn count_handler() -> (Arc, impl Fn(usize)) { let count = Arc::new(AtomicUsize::new(0)); (count.clone(), move |_| { count.fetch_add(1, Ordering::SeqCst); }) } /// Wait until a counter is no longer shared, then return its value. fn wait_for_counter(mut counter: Arc) -> usize { use std::{thread, time}; for _ in 0..60 { counter = match Arc::try_unwrap(counter) { Ok(counter) => return counter.into_inner(), Err(counter) => { thread::sleep(time::Duration::from_secs(1)); counter } }; } // That's too long! panic!("Counter is still shared!"); } #[test] fn failed_thread_stack() { // Note: we first tried to force failure with a `usize::MAX` stack, but // macOS and Windows weren't fazed, or at least didn't fail the way we want. // They work with `isize::MAX`, but 32-bit platforms may feasibly allocate a // 2GB stack, so it might not fail until the second thread. let stack_size = ::std::isize::MAX as usize; let (start_count, start_handler) = count_handler(); let (exit_count, exit_handler) = count_handler(); let builder = ThreadPoolBuilder::new() .num_threads(10) .stack_size(stack_size) .start_handler(start_handler) .exit_handler(exit_handler); let pool = builder.build(); assert!(pool.is_err(), "thread stack should have failed!"); // With such a huge stack, 64-bit will probably fail on the first thread; // 32-bit might manage the first 2GB, but certainly fail the second. let start_count = wait_for_counter(start_count); assert!(start_count <= 1); assert_eq!(start_count, wait_for_counter(exit_count)); } #[test] fn panic_thread_name() { let (start_count, start_handler) = count_handler(); let (exit_count, exit_handler) = count_handler(); let builder = ThreadPoolBuilder::new() .num_threads(10) .start_handler(start_handler) .exit_handler(exit_handler) .thread_name(|i| { if i >= 5 { panic!(); } format!("panic_thread_name#{}", i) }); let pool = unwind::halt_unwinding(|| builder.build()); assert!(pool.is_err(), "thread-name panic should propagate!"); // Assuming they're created in order, threads 0 through 4 should have // been started already, and then terminated by the panic. assert_eq!(5, wait_for_counter(start_count)); assert_eq!(5, wait_for_counter(exit_count)); } #[test] fn self_install() { let pool = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); // If the inner `install` blocks, then nothing will actually run it! assert!(pool.install(|| pool.install(|| true))); } #[test] fn mutual_install() { let pool1 = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); let pool2 = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); let ok = pool1.install(|| { // This creates a dependency from `pool1` -> `pool2` pool2.install(|| { // This creates a dependency from `pool2` -> `pool1` pool1.install(|| { // If they blocked on inter-pool installs, there would be no // threads left to run this! true }) }) }); assert!(ok); } #[test] fn mutual_install_sleepy() { use std::{thread, time}; let pool1 = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); let pool2 = ThreadPoolBuilder::new().num_threads(1).build().unwrap(); let ok = pool1.install(|| { // This creates a dependency from `pool1` -> `pool2` pool2.install(|| { // Give `pool1` time to fall asleep. thread::sleep(time::Duration::from_secs(1)); // This creates a dependency from `pool2` -> `pool1` pool1.install(|| { // Give `pool2` time to fall asleep. thread::sleep(time::Duration::from_secs(1)); // If they blocked on inter-pool installs, there would be no // threads left to run this! true }) }) }); assert!(ok); } #[test] #[allow(deprecated)] fn check_thread_pool_new() { let pool = ThreadPool::new(Configuration::new().num_threads(22)).unwrap(); assert_eq!(pool.current_num_threads(), 22); } macro_rules! test_scope_order { ($scope:ident => $spawn:ident) => {{ let builder = ThreadPoolBuilder::new().num_threads(1); let pool = builder.build().unwrap(); pool.install(|| { let vec = Mutex::new(vec![]); pool.$scope(|scope| { let vec = &vec; for i in 0..10 { scope.$spawn(move |_| { vec.lock().unwrap().push(i); }); } }); vec.into_inner().unwrap() }) }}; } #[test] fn scope_lifo_order() { let vec = test_scope_order!(scope => spawn); let expected: Vec = (0..10).rev().collect(); // LIFO -> reversed assert_eq!(vec, expected); } #[test] fn scope_fifo_order() { let vec = test_scope_order!(scope_fifo => spawn_fifo); let expected: Vec = (0..10).collect(); // FIFO -> natural order assert_eq!(vec, expected); } macro_rules! test_spawn_order { ($spawn:ident) => {{ let builder = ThreadPoolBuilder::new().num_threads(1); let pool = &builder.build().unwrap(); let (tx, rx) = channel(); pool.install(move || { for i in 0..10 { let tx = tx.clone(); pool.$spawn(move || { tx.send(i).unwrap(); }); } }); rx.iter().collect::>() }}; } #[test] fn spawn_lifo_order() { let vec = test_spawn_order!(spawn); let expected: Vec = (0..10).rev().collect(); // LIFO -> reversed assert_eq!(vec, expected); } #[test] fn spawn_fifo_order() { let vec = test_spawn_order!(spawn_fifo); let expected: Vec = (0..10).collect(); // FIFO -> natural order assert_eq!(vec, expected); } rayon-core-1.6.0/src/unwind.rs010066400247370024737000000016531353104652500145150ustar0000000000000000//! Package up unwind recovery. Note that if you are in some sensitive //! place, you can use the `AbortIfPanic` helper to protect against //! accidental panics in the rayon code itself. use std::any::Any; use std::panic::{self, AssertUnwindSafe}; use std::thread; /// Executes `f` and captures any panic, translating that panic into a /// `Err` result. The assumption is that any panic will be propagated /// later with `resume_unwinding`, and hence `f` can be treated as /// exception safe. pub(super) fn halt_unwinding(func: F) -> thread::Result where F: FnOnce() -> R, { panic::catch_unwind(AssertUnwindSafe(func)) } pub(super) fn resume_unwinding(payload: Box) -> ! { panic::resume_unwind(payload) } pub(super) struct AbortIfPanic; impl Drop for AbortIfPanic { fn drop(&mut self) { eprintln!("Rayon: detected unexpected panic; aborting"); ::std::process::abort(); } } rayon-core-1.6.0/src/util.rs010066400247370024737000000003421352066016100141540ustar0000000000000000use std::mem; pub(super) fn leak(v: T) -> &'static T { unsafe { let b = Box::new(v); let p: *const T = &*b; mem::forget(b); // leak our reference, so that `b` is never freed &*p } } rayon-core-1.6.0/tests/double_init_fail.rs010066400247370024737000000005661336642636100170640ustar0000000000000000extern crate rayon_core; use rayon_core::ThreadPoolBuilder; use std::error::Error; #[test] fn double_init_fail() { let result1 = ThreadPoolBuilder::new().build_global(); assert_eq!(result1.unwrap(), ()); let err = ThreadPoolBuilder::new().build_global().unwrap_err(); assert!(err.description() == "The global thread pool has already been initialized."); } rayon-core-1.6.0/tests/init_zero_threads.rs010066400247370024737000000003011336642636100172730ustar0000000000000000extern crate rayon_core; use rayon_core::ThreadPoolBuilder; #[test] fn init_zero_threads() { ThreadPoolBuilder::new() .num_threads(0) .build_global() .unwrap(); } rayon-core-1.6.0/tests/scope_join.rs010066400247370024737000000016571336642636100157260ustar0000000000000000extern crate rayon_core; /// Test that one can emulate join with `scope`: fn pseudo_join(f: F, g: G) where F: FnOnce() + Send, G: FnOnce() + Send, { rayon_core::scope(|s| { s.spawn(|_| g()); f(); }); } fn quick_sort(v: &mut [T]) { if v.len() <= 1 { return; } let mid = partition(v); let (lo, hi) = v.split_at_mut(mid); pseudo_join(|| quick_sort(lo), || quick_sort(hi)); } fn partition(v: &mut [T]) -> usize { let pivot = v.len() - 1; let mut i = 0; for j in 0..pivot { if v[j] <= v[pivot] { v.swap(i, j); i += 1; } } v.swap(i, pivot); i } fn is_sorted(v: &[T]) -> bool { (1..v.len()).all(|i| v[i - 1] <= v[i]) } #[test] fn scope_join() { let mut v: Vec = (0..256).rev().collect(); quick_sort(&mut v); assert!(is_sorted(&v)); } rayon-core-1.6.0/tests/scoped_threadpool.rs010066400247370024737000000062441352066016100172570ustar0000000000000000extern crate crossbeam_utils; extern crate rayon_core; #[macro_use] extern crate scoped_tls; use crossbeam_utils::thread; use rayon_core::ThreadPoolBuilder; #[derive(PartialEq, Eq, Debug)] struct Local(i32); scoped_thread_local!(static LOCAL: Local); #[test] fn missing_scoped_tls() { LOCAL.set(&Local(42), || { let pool = ThreadPoolBuilder::new() .build() .expect("thread pool created"); // `LOCAL` is not set in the pool. pool.install(|| { assert!(!LOCAL.is_set()); }); }); } #[test] fn spawn_scoped_tls_threadpool() { LOCAL.set(&Local(42), || { LOCAL.with(|x| { thread::scope(|scope| { let pool = ThreadPoolBuilder::new() .spawn_handler(move |thread| { scope .builder() .spawn(move |_| { // Borrow the same local value in the thread pool. LOCAL.set(x, || thread.run()) }) .map(|_| ()) }) .build() .expect("thread pool created"); // The pool matches our local value. pool.install(|| { assert!(LOCAL.is_set()); LOCAL.with(|y| { assert_eq!(x, y); }); }); // If we change our local value, the pool is not affected. LOCAL.set(&Local(-1), || { pool.install(|| { assert!(LOCAL.is_set()); LOCAL.with(|y| { assert_eq!(x, y); }); }); }); }) .expect("scope threads ok"); // `thread::scope` will wait for the threads to exit before returning. }); }); } #[test] fn build_scoped_tls_threadpool() { LOCAL.set(&Local(42), || { LOCAL.with(|x| { ThreadPoolBuilder::new() .build_scoped( move |thread| LOCAL.set(x, || thread.run()), |pool| { // The pool matches our local value. pool.install(|| { assert!(LOCAL.is_set()); LOCAL.with(|y| { assert_eq!(x, y); }); }); // If we change our local value, the pool is not affected. LOCAL.set(&Local(-1), || { pool.install(|| { assert!(LOCAL.is_set()); LOCAL.with(|y| { assert_eq!(x, y); }); }); }); }, ) .expect("thread pool created"); // Internally, `crossbeam::scope` will wait for the threads to exit before returning. }); }); } rayon-core-1.6.0/tests/simple_panic.rs010066400247370024737000000002461336642636100162320ustar0000000000000000extern crate rayon_core; use rayon_core::join; #[test] #[should_panic(expected = "should panic")] fn simple_panic() { join(|| {}, || panic!("should panic")); } rayon-core-1.6.0/tests/stack_overflow_crash.rs010066400247370024737000000042151352066016100177650ustar0000000000000000#[cfg(unix)] extern crate libc; extern crate rayon_core; use rayon_core::ThreadPoolBuilder; use std::env; use std::process::Command; #[cfg(target_os = "linux")] use std::os::unix::process::ExitStatusExt; fn force_stack_overflow(depth: u32) { let _buffer = [0u8; 1024 * 1024]; if depth > 0 { force_stack_overflow(depth - 1); } } #[cfg(unix)] fn disable_core() { unsafe { libc::setrlimit( libc::RLIMIT_CORE, &libc::rlimit { rlim_cur: 0, rlim_max: 0, }, ); } } #[cfg(unix)] fn overflow_code() -> Option { None } #[cfg(windows)] fn overflow_code() -> Option { use std::os::windows::process::ExitStatusExt; use std::process::ExitStatus; ExitStatus::from_raw(0xc00000fd /*STATUS_STACK_OVERFLOW*/).code() } fn main() { if env::args().len() == 1 { // first check that the recursivecall actually causes a stack overflow, and does not get optimized away { let status = Command::new(env::current_exe().unwrap()) .arg("8") .status() .unwrap(); #[cfg(any(unix, windows))] assert_eq!(status.code(), overflow_code()); #[cfg(target_os = "linux")] assert!( status.signal() == Some(11 /*SIGABRT*/) || status.signal() == Some(6 /*SIGSEGV*/) ); } // now run with a larger stack and verify correct operation { let status = Command::new(env::current_exe().unwrap()) .arg("48") .status() .unwrap(); assert_eq!(status.code(), Some(0)); #[cfg(target_os = "linux")] assert_eq!(status.signal(), None); } } else { let stack_size_in_mb: usize = env::args().nth(1).unwrap().parse().unwrap(); let pool = ThreadPoolBuilder::new() .stack_size(stack_size_in_mb * 1024 * 1024) .build() .unwrap(); pool.install(|| { #[cfg(unix)] disable_core(); force_stack_overflow(32); }); } } rayon-core-1.6.0/.cargo_vcs_info.json0000644000000001120000000000000131470ustar00{ "git": { "sha1": "60cdb431731d471add01281f63ccf8f8bc0451f3" } }