average-0.13.1/.cargo_vcs_info.json0000644000000001120000000000000125210ustar { "git": { "sha1": "0f9f7308db68e2f51f9b7f521568bdf162ad3d4a" } } average-0.13.1/Cargo.toml0000644000000044040000000000000105270ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "average" version = "0.13.1" authors = ["Vinzent Steinberg "] include = ["src/**/*", "benches/*", "LICENSE-*", "README.md"] description = "Calculate statistics iteratively" documentation = "https://docs.rs/average" readme = "README.md" keywords = ["stats", "mean", "skewness", "kurtosis", "quantile"] categories = ["science", "no-std"] license = "MIT/Apache-2.0" repository = "https://github.com/vks/average" resolver = "2" [package.metadata.docs.rs] features = ["libm", "serde1", "rayon"] rustdoc-args = ["--cfg", "doc_cfg"] [[bench]] name = "mean" harness = false [[bench]] name = "min" harness = false [[bench]] name = "kurtosis" harness = false [dependencies.easy-cast] version = "0.4" optional = true default-features = false [dependencies.float-ord] version = "0.3" [dependencies.num-traits] version = "0.2" default-features = false [dependencies.rayon] version = "1.3" optional = true [dependencies.serde] version = "1" features = ["derive"] optional = true default-features = false [dependencies.serde-big-array] version = "0.3.0" optional = true [dependencies.serde_derive] version = "1" optional = true [dev-dependencies.bencher] version = "0.1" [dev-dependencies.byteorder] version = "=1.3" [dev-dependencies.proptest] version = "1" [dev-dependencies.quantiles] version = "0.7" [dev-dependencies.rand] version = "0.8" [dev-dependencies.rand_distr] version = "0.4" [dev-dependencies.rand_xoshiro] version = "0.6" [dev-dependencies.serde_json] version = "1" [dev-dependencies.streaming-stats] version = "0.2" [features] default = ["libm"] libm = ["easy-cast/libm", "num-traits/libm"] nightly = [] serde1 = ["serde", "serde_derive", "serde-big-array"] std = ["easy-cast/std", "num-traits/std"] average-0.13.1/Cargo.toml.orig000064400000000000000000000034540000000000000141720ustar 00000000000000[package] authors = ["Vinzent Steinberg "] categories = ["science", "no-std"] description = "Calculate statistics iteratively" documentation = "https://docs.rs/average" keywords = ["stats", "mean", "skewness", "kurtosis", "quantile"] license = "MIT/Apache-2.0" name = "average" readme = "README.md" repository = "https://github.com/vks/average" version = "0.13.1" edition = "2018" include = ["src/**/*", "benches/*", "LICENSE-*", "README.md"] resolver = "2" # This is ignored by Rust <= 1.50 [features] serde1 = ["serde", "serde_derive", "serde-big-array"] nightly = [] std = ["easy-cast/std", "num-traits/std"] libm = ["easy-cast/libm", "num-traits/libm"] default = ["libm"] [[bench]] harness = false name = "mean" [[bench]] harness = false name = "min" [[bench]] harness = false name = "kurtosis" [dependencies] num-traits = { version = "0.2", default-features = false } float-ord = "0.3" easy-cast = { version = "0.4", default-features = false, optional = true } serde_derive = { version = "1", optional = true } serde-big-array = { version = "0.3.0", optional = true } rayon = { version = "1.3", optional = true } [dependencies.serde] version = "1" default-features = false features = ["derive"] optional = true [dev-dependencies] bencher = "0.1" rand = "0.8" rand_xoshiro = "0.6" rand_distr = "0.4" serde_json = "1" streaming-stats = "0.2" quantiles = "0.7" proptest = "1" # byteorder is not a direct dependency, but the MSRV of 1.4 is higher than ours. # Therefore, we have to enforce version 1.3. byteorder = "=1.3" [package.metadata.docs.rs] # Enable certain features when building docs for docs.rs features = ["libm", "serde1", "rayon"] rustdoc-args = ["--cfg", "doc_cfg"] # To build locally: # RUSTDOCFLAGS="--cfg doc_cfg" cargo +nightly doc --features libm,serde1,rayon --no-deps --open average-0.13.1/LICENSE-APACHE000064400000000000000000000251370000000000000132310ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. average-0.13.1/LICENSE-MIT000064400000000000000000000017770000000000000127450ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. average-0.13.1/README.md000064400000000000000000000032360000000000000125600ustar 00000000000000# average Calculate statistics of a sequence iteratively in a single pass, using constant space and avoiding numerical problems. The calculations can be easily parallelized by using `merge`. This crate works without `std`. [![Documentation Status]][docs.rs] [![Latest Version]][crates.io] [![Build Status]][actions] [Documentation Status]: https://docs.rs/average/badge.svg [docs.rs]: https://docs.rs/average [Build Status]: https://github.com/kas-gui/easy-cast/workflows/Tests/badge.svg?event=push [actions]: https://github.com/vks/average/actions [Latest Version]: https://img.shields.io/crates/v/average.svg [crates.io]: https://crates.io/crates/average ## Implemented statistics * Mean and its error. * Variance, skewness, kurtosis. * Arbitrary moments. * Minimum and maximum. * Quantile. * Histogram. ## Crate features The following features are available: * `libm` enables `Quantile` (using floating point functions provided by `libm`). This is enabled by default. If the `std` feature is also enabled, `std` is preferred over `libm`. * `std` enables `Quantile` (using floating point functions provided by `std`). * `serde1` enables serialization, via Serde version 1. * `rayon` enables support for `rayon::iter::FromParallelIterator`. * `nightly` enables the use of const generics for a histogram implementation without macros. Note that nightly features are not stable and therefore not all library and compiler versions will be compatible. ## Rust version requirements Rustc version 1.36 or greater is supported. ## Related Projects * [`quantiles`](https://crates.io/crates/quantiles): Provides quantile estimates with bounded error but using growing space. average-0.13.1/benches/kurtosis.rs000064400000000000000000000020610000000000000151340ustar 00000000000000use bencher::{Bencher, benchmark_group, benchmark_main}; /// Create a random vector by sampling from a normal distribution. fn initialize_vec() -> Vec { use rand_distr::{Normal, Distribution}; use rand::SeedableRng; let normal = Normal::new(2.0, 3.0).unwrap(); let n = 1_000_000; let mut values = Vec::with_capacity(n); let mut rng = rand_xoshiro::Xoshiro256StarStar::from_seed( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]); for _ in 0..n { values.push(normal.sample(&mut rng)); } values } fn bench_kurtosis(b: &mut Bencher) { let values = initialize_vec(); b.iter(|| { let m: average::Kurtosis = values.iter().copied().collect(); m }); } fn bench_moments(b: &mut Bencher) { let values = initialize_vec(); b.iter(|| { let m: average::Moments4 = values.iter().copied().collect(); m }); } benchmark_group!(benches, bench_kurtosis, bench_moments); benchmark_main!(benches); average-0.13.1/benches/mean.rs000064400000000000000000000020610000000000000141710ustar 00000000000000use bencher::{Bencher, benchmark_group, benchmark_main}; /// Create a random vector by sampling from a normal distribution. fn initialize_vec() -> Vec { use rand_distr::{Normal, Distribution}; use rand::SeedableRng; let normal = Normal::new(2.0, 3.0).unwrap(); let n = 1_000_000; let mut values = Vec::with_capacity(n); let mut rng = rand_xoshiro::Xoshiro256StarStar::from_seed( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]); for _ in 0..n { values.push(normal.sample(&mut rng)); } values } fn bench_average(b: &mut Bencher) { let values = initialize_vec(); b.iter(|| { let m: average::MeanWithError = values.iter().copied().collect(); m }); } fn bench_stats(b: &mut Bencher) { let values = initialize_vec(); b.iter(|| { let m: stats::OnlineStats = values.iter().copied().collect(); m }); } benchmark_group!(benches, bench_average, bench_stats); benchmark_main!(benches); average-0.13.1/benches/min.rs000064400000000000000000000020710000000000000140350ustar 00000000000000use bencher::{Bencher, benchmark_group, benchmark_main}; /// Create a random vector of random floats in [0, 1]. fn initialize_vec() -> Vec { use rand_distr::{Uniform, Distribution}; use rand::SeedableRng; let range = Uniform::new(0.0, 1.0); let n = 1_000_000; let mut values = Vec::with_capacity(n); let mut rng = rand_xoshiro::Xoshiro256StarStar::from_seed( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]); for _ in 0..n { values.push(range.sample(&mut rng)); } values } fn bench_average(b: &mut Bencher) { let values = initialize_vec(); b.iter(|| { let a: average::Min = values.iter().copied().collect(); a }); } fn bench_iter(b: &mut Bencher) { let values = initialize_vec(); b.iter(|| { let mut it = values.iter(); let init: f64 = *it.next().unwrap(); it.fold(init, |a, &b| a.min(b)) }); } benchmark_group!(benches, bench_average, bench_iter); benchmark_main!(benches); average-0.13.1/src/histogram.rs000064400000000000000000000240210000000000000144260ustar 00000000000000/// Invalid ranges were specified for constructing the histogram. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum InvalidRangeError { /// The number of ranges is less than the number of bins + 1. NotEnoughRanges, /// The ranges are not sorted or `(low, high)` where `low` > `high` is /// encountered. NotSorted, /// A range contains `nan`. NaN, } /// A sample is out of range of the histogram. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct SampleOutOfRangeError; #[doc(hidden)] #[macro_export] macro_rules! define_histogram_common { ($LEN:expr) => ( use $crate::Histogram as Trait; /// The number of bins of the histogram. const LEN: usize = $LEN; impl ::core::fmt::Debug for Histogram { fn fmt(&self, formatter: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { formatter.write_str("Histogram {{ range: ")?; self.range[..].fmt(formatter)?; formatter.write_str(", bins: ")?; self.bin[..].fmt(formatter)?; formatter.write_str(" }}") } } impl Histogram { /// Construct a histogram with constant bin width. #[inline] pub fn with_const_width(start: f64, end: f64) -> Self { let step = (end - start) / (LEN as f64); let mut range = [0.; LEN + 1]; for (i, r) in range.iter_mut().enumerate() { *r = start + step * (i as f64); } Self { range, bin: [0; LEN], } } /// Construct a histogram from given ranges. /// /// The ranges are given by an iterator of floats where neighboring /// pairs `(a, b)` define a bin for all `x` where `a <= x < b`. /// /// Fails if the iterator is too short (less than `n + 1` where `n` /// is the number of bins), is not sorted or contains `nan`. `inf` /// and empty ranges are allowed. #[inline] pub fn from_ranges(ranges: T) -> Result where T: IntoIterator { let mut range = [0.; LEN + 1]; let mut last_i = 0; for (i, r) in ranges.into_iter().enumerate() { if i > LEN { break; } if r.is_nan() { return Err($crate::InvalidRangeError::NaN); } if i > 0 && range[i - 1] > r { return Err($crate::InvalidRangeError::NotSorted); } range[i] = r; last_i = i; } if last_i != LEN { return Err($crate::InvalidRangeError::NotEnoughRanges); } Ok(Self { range, bin: [0; LEN], }) } /// Find the index of the bin corresponding to the given sample. /// /// Fails if the sample is out of range of the histogram. #[inline] pub fn find(&self, x: f64) -> Result { // We made sure our ranges are valid at construction, so we can // safely unwrap. match self.range.binary_search_by(|p| p.partial_cmp(&x).unwrap()) { Ok(i) if i < LEN => { Ok(i) }, Err(i) if i > 0 && i < LEN + 1 => { Ok(i - 1) }, _ => { Err($crate::SampleOutOfRangeError) }, } } /// Add a sample to the histogram. /// /// Fails if the sample is out of range of the histogram. #[inline] pub fn add(&mut self, x: f64) -> Result<(), $crate::SampleOutOfRangeError> { if let Ok(i) = self.find(x) { self.bin[i] += 1; Ok(()) } else { Err($crate::SampleOutOfRangeError) } } /// Return the ranges of the histogram. #[inline] pub fn ranges(&self) -> &[f64] { &self.range[..] } /// Return an iterator over the bins and corresponding ranges: /// `((lower, upper), count)` #[inline] pub fn iter(&self) -> IterHistogram<'_> { self.into_iter() } /// Reset all bins to zero. #[inline] pub fn reset(&mut self) { self.bin = [0; LEN]; } /// Return the lower range limit. /// /// (The corresponding bin might be empty.) #[inline] pub fn range_min(&self) -> f64 { self.range[0] } /// Return the upper range limit. /// /// (The corresponding bin might be empty.) #[inline] pub fn range_max(&self) -> f64 { self.range[LEN] } } /// Iterate over all `(range, count)` pairs in the histogram. #[derive(Debug, Clone)] pub struct IterHistogram<'a> { remaining_bin: &'a [u64], remaining_range: &'a [f64], } impl<'a> ::core::iter::Iterator for IterHistogram<'a> { type Item = ((f64, f64), u64); fn next(&mut self) -> Option<((f64, f64), u64)> { if let Some((&bin, rest)) = self.remaining_bin.split_first() { let left = self.remaining_range[0]; let right = self.remaining_range[1]; self.remaining_bin = rest; self.remaining_range = &self.remaining_range[1..]; return Some(((left, right), bin)); } None } } impl<'a> ::core::iter::IntoIterator for &'a Histogram { type Item = ((f64, f64), u64); type IntoIter = IterHistogram<'a>; fn into_iter(self) -> IterHistogram<'a> { IterHistogram { remaining_bin: self.bins(), remaining_range: self.ranges(), } } } impl $crate::Histogram for Histogram { #[inline] fn bins(&self) -> &[u64] { &self.bin[..] } } impl<'a> ::core::ops::AddAssign<&'a Self> for Histogram { #[inline] fn add_assign(&mut self, other: &Self) { for (a, b) in self.range.iter().zip(other.range.iter()) { assert_eq!(a, b, "Both histograms must have the same ranges"); } for (x, y) in self.bin.iter_mut().zip(other.bin.iter()) { *x += y; } } } impl ::core::ops::MulAssign for Histogram { #[inline] fn mul_assign(&mut self, other: u64) { for x in &mut self.bin[..] { *x *= other; } } } impl $crate::Merge for Histogram { fn merge(&mut self, other: &Self) { assert_eq!(self.bin.len(), other.bin.len()); for (a, b) in self.range.iter().zip(other.range.iter()) { assert_eq!(a, b, "Both histograms must have the same ranges"); } for (a, b) in self.bin.iter_mut().zip(other.bin.iter()) { *a += *b; } } } ); } #[cfg(feature = "serde1")] #[doc(hidden)] #[macro_export] macro_rules! define_histogram_inner { ($name:ident, $LEN:expr) => ( mod $name { $crate::define_histogram_common!($LEN); use ::serde::{Serialize, Deserialize}; serde_big_array::big_array! { BigArray; LEN, (LEN + 1), } /// A histogram with a number of bins known at compile time. #[derive(Clone, Serialize, Deserialize)] pub struct Histogram { /// The ranges defining the bins of the histogram. #[serde(with = "BigArray")] range: [f64; LEN + 1], /// The bins of the histogram. #[serde(with = "BigArray")] bin: [u64; LEN], } } ); } #[cfg(not(feature = "serde1"))] #[doc(hidden)] #[macro_export] macro_rules! define_histogram_inner { ($name:ident, $LEN:expr) => ( mod $name { $crate::define_histogram_common!($LEN); /// A histogram with a number of bins known at compile time. #[derive(Clone)] pub struct Histogram { /// The ranges defining the bins of the histogram. range: [f64; LEN + 1], /// The bins of the histogram. bin: [u64; LEN], } } ); } /// Define a histogram with a number of bins known at compile time. /// /// Because macros are not hygenic for items, everything is defined in a private /// module with the given name. This includes the `Histogram` struct, the number /// of bins `LEN` and the histogram iterator `HistogramIter`. /// /// Note that you need to make sure that `core` is accessible to the macro. /// /// /// # Example /// /// ``` /// use average::{Histogram, define_histogram}; /// /// define_histogram!(hist, 10); /// let mut h = hist::Histogram::with_const_width(0., 100.); /// for i in 0..100 { /// h.add(i as f64).unwrap(); /// } /// assert_eq!(h.bins(), &[10, 10, 10, 10, 10, 10, 10, 10, 10, 10]); /// ``` #[macro_export] macro_rules! define_histogram { ($name:ident, $LEN:expr) => ($crate::define_histogram_inner!($name, $LEN);); } average-0.13.1/src/histogram_const.rs000064400000000000000000000236700000000000000156450ustar 00000000000000//! Histogram implementation via const generics. /// Invalid ranges were specified for constructing the histogram. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum InvalidRangeError { /// The number of ranges is less than the number of bins + 1. NotEnoughRanges, /// The ranges are not sorted or `(low, high)` where `low` > `high` is /// encountered. NotSorted, /// A range contains `nan`. NaN, } /// A sample is out of range of the histogram. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct SampleOutOfRangeError; impl ::core::fmt::Debug for Histogram where [u8; LEN + 1]: Sized { fn fmt(&self, formatter: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result { formatter.write_str("Histogram {{ range: ")?; self.range[..].fmt(formatter)?; formatter.write_str(", bins: ")?; self.bin[..].fmt(formatter)?; formatter.write_str(" }}") } } impl Histogram where [u8; LEN + 1]: Sized { /// Construct a histogram with constant bin width. #[inline] pub fn with_const_width(start: f64, end: f64) -> Self { let step = (end - start) / (LEN as f64); let mut range = [0.; LEN + 1]; for (i, r) in range.iter_mut().enumerate() { *r = start + step * (i as f64); } Self { range, bin: [0; LEN], } } /// Construct a histogram from given ranges. /// /// The ranges are given by an iterator of floats where neighboring /// pairs `(a, b)` define a bin for all `x` where `a <= x < b`. /// /// Fails if the iterator is too short (less than `n + 1` where `n` /// is the number of bins), is not sorted or contains `nan`. `inf` /// and empty ranges are allowed. #[inline] pub fn from_ranges(ranges: T) -> Result where T: IntoIterator { let mut range = [0.; LEN + 1]; let mut last_i = 0; for (i, r) in ranges.into_iter().enumerate() { if i > LEN { break; } if r.is_nan() { return Err(InvalidRangeError::NaN); } if i > 0 && range[i - 1] > r { return Err(InvalidRangeError::NotSorted); } range[i] = r; last_i = i; } if last_i != LEN { return Err(InvalidRangeError::NotEnoughRanges); } Ok(Self { range, bin: [0; LEN], }) } /// Find the index of the bin corresponding to the given sample. /// /// Fails if the sample is out of range of the histogram. #[inline] pub fn find(&self, x: f64) -> Result { // We made sure our ranges are valid at construction, so we can // safely unwrap. match self.range.binary_search_by(|p| p.partial_cmp(&x).unwrap()) { Ok(i) if i < LEN => { Ok(i) }, Err(i) if i > 0 && i < LEN + 1 => { Ok(i - 1) }, _ => { Err(SampleOutOfRangeError) }, } } /// Add a sample to the histogram. /// /// Fails if the sample is out of range of the histogram. #[inline] pub fn add(&mut self, x: f64) -> Result<(), SampleOutOfRangeError> { if let Ok(i) = self.find(x) { self.bin[i] += 1; Ok(()) } else { Err(SampleOutOfRangeError) } } /// Return the ranges of the histogram. #[inline] pub fn ranges(&self) -> &[f64] { &self.range[..] } /// Return an iterator over the bins and corresponding ranges: /// `((lower, upper), count)` #[inline] pub fn iter(&self) -> IterHistogram<'_> { self.into_iter() } /// Reset all bins to zero. #[inline] pub fn reset(&mut self) { self.bin = [0; LEN]; } /// Return the lower range limit. /// /// (The corresponding bin might be empty.) #[inline] pub fn range_min(&self) -> f64 { self.range[0] } /// Return the upper range limit. /// /// (The corresponding bin might be empty.) #[inline] pub fn range_max(&self) -> f64 { self.range[LEN] } /// Return the bins of the histogram. #[inline] pub fn bins(&self) -> &[u64] { &self.bin[..] } /// Estimate the variance for the given bin. /// /// The square root of this estimates the error of the bin count. #[inline] pub fn variance(&self, bin: usize) -> f64 { let count = self.bins()[bin]; let sum: u64 = self.bins().iter().sum(); multinomal_variance(count as f64, 1./(sum as f64)) } /// Return an iterator over the bins normalized by the bin widths. #[inline] pub fn normalized_bins(&self) -> IterNormalized<<&Self as IntoIterator>::IntoIter> { IterNormalized { histogram_iter: self.into_iter() } } /// Return an iterator over the bin widths. #[inline] pub fn widths(&self) -> IterWidths<<&Self as IntoIterator>::IntoIter> { IterWidths { histogram_iter: self.into_iter() } } /// Return an iterator over the bin centers. #[inline] pub fn centers(&self) -> IterBinCenters<<&Self as IntoIterator>::IntoIter> { IterBinCenters { histogram_iter: self.into_iter() } } /// Return an iterator over the bin variances. /// /// This is more efficient than calling `variance()` for each bin. #[inline] pub fn variances(&self) -> IterVariances<<&Self as IntoIterator>::IntoIter> { let sum: u64 = self.bins().iter().sum(); IterVariances { histogram_iter: self.into_iter(), sum_inv: 1./(sum as f64) } } } /// Iterate over all `(range, count)` pairs in the histogram. #[derive(Clone, Debug)] pub struct IterHistogram<'a> { remaining_bin: &'a [u64], remaining_range: &'a [f64], } impl<'a> ::core::iter::Iterator for IterHistogram<'a> { type Item = ((f64, f64), u64); fn next(&mut self) -> Option<((f64, f64), u64)> { if let Some((&bin, rest)) = self.remaining_bin.split_first() { let left = self.remaining_range[0]; let right = self.remaining_range[1]; self.remaining_bin = rest; self.remaining_range = &self.remaining_range[1..]; return Some(((left, right), bin)); } None } } impl<'a, const LEN: usize> ::core::iter::IntoIterator for &'a Histogram where [u8; LEN + 1]: Sized { type Item = ((f64, f64), u64); type IntoIter = IterHistogram<'a>; fn into_iter(self) -> IterHistogram<'a> { IterHistogram { remaining_bin: self.bins(), remaining_range: self.ranges(), } } } impl<'a, const LEN: usize> ::core::ops::AddAssign<&'a Self> for Histogram where [u8; LEN + 1]: Sized { #[inline] fn add_assign(&mut self, other: &Self) { for (a, b) in self.range.iter().zip(other.range.iter()) { assert_eq!(a, b, "Both histograms must have the same ranges"); } for (x, y) in self.bin.iter_mut().zip(other.bin.iter()) { *x += y; } } } impl ::core::ops::MulAssign for Histogram where [u8; LEN + 1]: Sized { #[inline] fn mul_assign(&mut self, other: u64) { for x in &mut self.bin[..] { *x *= other; } } } impl crate::Merge for Histogram where [u8; LEN + 1]: Sized { fn merge(&mut self, other: &Self) { assert_eq!(self.bin.len(), other.bin.len()); for (a, b) in self.range.iter().zip(other.range.iter()) { assert_eq!(a, b, "Both histograms must have the same ranges"); } for (a, b) in self.bin.iter_mut().zip(other.bin.iter()) { *a += *b; } } } /// A histogram with a number of bins known at compile time. #[derive(Clone)] pub struct Histogram where [u8; LEN + 1]: Sized { /// The ranges defining the bins of the histogram. range: [f64; LEN + 1], /// The bins of the histogram. bin: [u64; LEN], } /// Calculate the multinomial variance. Relevant for histograms. #[inline(always)] fn multinomal_variance(n: f64, n_tot_inv: f64) -> f64 { n * (1. - n * n_tot_inv) } /// Iterate over the bins normalized by bin width. #[derive(Clone, Debug)] pub struct IterNormalized where T: Iterator { histogram_iter: T, } impl Iterator for IterNormalized where T: Iterator { type Item = f64; #[inline] fn next(&mut self) -> Option { self.histogram_iter.next().map(|((a, b), count)| (count as f64) / (b - a)) } } /// Iterate over the widths of the bins. #[derive(Clone, Debug)] pub struct IterWidths where T: Iterator { histogram_iter: T, } impl Iterator for IterWidths where T: Iterator { type Item = f64; #[inline] fn next(&mut self) -> Option { self.histogram_iter.next().map(|((a, b), _)| b - a) } } /// Iterate over the bin centers. #[derive(Clone, Debug)] pub struct IterBinCenters where T: Iterator { histogram_iter: T, } impl Iterator for IterBinCenters where T: Iterator { type Item = f64; #[inline] fn next(&mut self) -> Option { self.histogram_iter.next().map(|((a, b), _)| 0.5 * (a + b)) } } /// Iterate over the variances. #[derive(Clone, Debug)] pub struct IterVariances where T: Iterator { histogram_iter: T, sum_inv: f64, } impl Iterator for IterVariances where T: Iterator { type Item = f64; #[inline] fn next(&mut self) -> Option { self.histogram_iter.next() .map(|(_, n)| multinomal_variance(n as f64, self.sum_inv)) } } average-0.13.1/src/lib.rs000064400000000000000000000113600000000000000132010ustar 00000000000000//! This crate provides estimators for statistics on a sequence of numbers. The //! typical workflow looks like this: //! //! 1. If necessary, build your custom estimator using [`concatenate`] or //! [`define_moments`]. //! 2. Initialize the estimator of your choice with `new()`. //! 3. Add some subset (called "sample") of the sequence of numbers (called //! "population") for which you want to estimate the statistic, using `add()` //! or `collect()`. //! 4. Calculate the statistic with `mean()` or similar. //! //! You can run several estimators in parallel and merge them into one with //! `merge()`. //! //! Everything is calculated iteratively in a single pass using constant memory, //! so the sequence of numbers can be an iterator. The used algorithms try to //! avoid numerical instabilities. //! //! If you want [Serde](https://github.com/serde-rs/serde) support, //! include `"serde1"` in your list of features. //! //! Note that deserializing does not currently check for all invalid inputs. //! For example, if you deserialize a corrupted [`Variance`] it may return //! a negative value for variance, even though that is mathematically impossible. //! In a future minor release some of these checks may be added. //! //! //! ### Example //! //! ``` //! use average::{MeanWithError, Estimate}; //! //! let mut a: MeanWithError = (1..6).map(f64::from).collect(); //! a.add(42.); //! println!("The mean is {} ± {}.", a.mean(), a.error()); //! ``` //! //! //! ## Estimators //! //! * Mean ([`Mean`]) and its error ([`MeanWithError`]). //! * Weighted mean ([`WeightedMean`]) and its error //! ([`WeightedMeanWithError`]). //! * Variance ([`Variance`]), skewness ([`Skewness`]) and kurtosis //! ([`Kurtosis`]). //! * Arbitrary higher moments ([`define_moments`]). //! * Quantiles ([`Quantile`]). //! * Minimum ([`Min`]) and maximum ([`Max`]). //! //! //! ## Estimating several statistics at once //! //! The estimators are designed to have minimal state. The recommended way to //! calculate several of them at once is to create a struct with all the //! estimators you need. You can then implement `add` for your struct by //! forwarding to the underlying estimators. Everything is inlined, so there //! should be no overhead. //! //! You can avoid the boilerplate code by using the [`concatenate`] macro. //! //! Note that calculating moments requires calculating the lower moments, so you //! only need to include the highest moment in your struct. //! //! //! ## Calculating histograms //! //! The [`define_histogram`] macro can be used to define a histogram struct that //! uses constant memory. See [`Histogram10`] (defined using //! `define_histogram!(..., 10)`) and the extension trait [`Histogram`] //! for the methods available to the generated struct. //! //! //! [`Mean`]: ./struct.Mean.html //! [`MeanWithError`]: ./type.MeanWithError.html //! [`WeightedMean`]: ./struct.WeightedMean.html //! [`WeightedMeanWithError`]: ./struct.WeightedMeanWithError.html //! [`Variance`]: ./struct.Variance.html //! [`Skewness`]: ./struct.Skewness.html //! [`Kurtosis`]: ./struct.Kurtosis.html //! [`Quantile`]: ./struct.Quantile.html //! [`Min`]: ./struct.Min.html //! [`Max`]: ./struct.Max.html //! [`concatenate`]: ./macro.concatenate.html //! [`define_moments`]: ./macro.define_moments.html //! [`define_histogram`]: ./macro.define_histogram.html //! [`Histogram10`]: ./struct.Histogram10.html //! [`Histogram`]: ./trait.Histogram.html #![cfg_attr(doc_cfg, feature(doc_cfg))] #![allow( clippy::float_cmp, clippy::suspicious_operation_groupings, )] #![no_std] #![forbid(unsafe_code)] #![forbid(missing_docs)] #![forbid(missing_debug_implementations)] #![cfg_attr(feature = "nightly", feature(const_generics, const_evaluatable_checked))] #[macro_use] mod macros; #[macro_use] mod moments; mod weighted_mean; mod minmax; #[cfg(any(feature = "std", feature = "libm"))] #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] mod quantile; mod traits; #[macro_use] mod histogram; #[cfg(feature = "nightly")] #[cfg_attr(doc_cfg, doc(cfg(feature = "nightly")))] pub mod histogram_const; pub use crate::moments::{Mean, Variance, MeanWithError}; #[cfg(any(feature = "std", feature = "libm"))] #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] pub use crate::moments::{Skewness, Kurtosis}; pub use crate::weighted_mean::{WeightedMean, WeightedMeanWithError}; pub use crate::minmax::{Min, Max}; #[cfg(any(feature = "std", feature = "libm"))] #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] pub use crate::quantile::Quantile; pub use crate::traits::{Estimate, Merge, Histogram}; pub use crate::histogram::{InvalidRangeError, SampleOutOfRangeError}; define_histogram!(hist, 10); pub use crate::hist::Histogram as Histogram10; define_moments!(Moments4, 4); average-0.13.1/src/macros.rs000064400000000000000000000150540000000000000137230ustar 00000000000000/// Assert that two numbers are almost equal to each other. /// /// On panic, this macro will print the values of the expressions with their /// debug representations. #[macro_export] macro_rules! assert_almost_eq { ($a:expr, $b:expr, $prec:expr) => ( let diff = ($a - $b).abs(); if diff > $prec { panic!("assertion failed: `abs(left - right) = {:.1e} < {:e}`, \ (left: `{}`, right: `{}`)", diff, $prec, $a, $b); } ); } /// Concatenate several iterative estimators into one. /// /// `$name` is the name of the new struct. `$statistic` is the name of a /// statistic and must exist as a method of the corresponding type `$estimator`. /// `$estimator` must have an `add` method for adding new observations to the /// sample (taking an `f64` as an argument). It must also implement `Default`. /// /// If the short syntax is used, the fields will be named `$statistic`. Use the /// long syntax and `$field` to give them explicit names. The long syntax also /// supports calculating several statistics from one estimator. /// /// For moments, only an estimator for the highest moment should be used and /// reused for the lower moments (see the example below). /// /// The following methods will be implemented: `new`, `add`, `$statistic`. /// /// The following traits will be implemented: `Default`, `FromIterator`. /// /// /// # Examples /// /// ``` /// use average::{Min, Max, Estimate, concatenate}; /// /// concatenate!(MinMax, [Min, min], [Max, max]); /// /// let s: MinMax = (1..6).map(f64::from).collect(); /// /// assert_eq!(s.min(), 1.0); /// assert_eq!(s.max(), 5.0); /// ``` /// /// The generated code looks roughly like this: /// /// ``` /// # use average::{Min, Max, Estimate}; /// # /// struct MinMax { /// min: Min, /// max: Max, /// } /// /// impl MinMax { /// pub fn new() -> MinMax { /// MinMax { min: Min::default(), max: Max::default() } /// } /// /// pub fn add(&mut self, x: f64) { /// self.min.add(x); /// self.max.add(x); /// } /// /// pub fn min(&self) -> f64 { /// self.min.min() /// } /// /// pub fn max(&self) -> f64 { /// self.max.max() /// } /// } /// ``` /// /// If you want to calculate the mean, variance and the median in one pass, you /// can do the following: /// /// ```ignore /// use average::{Variance, Quantile, Estimate, concatenate}; /// /// concatenate!(Estimator, /// [Variance, variance, mean, sample_variance], /// [Quantile, quantile, quantile]); /// ``` #[macro_export] macro_rules! concatenate { ( $name:ident, $([$estimator:ident, $statistic:ident]),+ ) => { concatenate!( $name, $([$estimator, $statistic, $statistic]),* ); }; ( $name:ident, $( [$estimator:ident, $field:ident, $($statistic:ident),+] ),+ ) => { struct $name { $( $field: $estimator, )* } impl $name { #[inline] pub fn new() -> $name { $name { $( $field: ::core::default::Default::default(), )* } } #[inline] pub fn add(&mut self, x: f64) { $( self.$field.add(x); )* } $( $( #[inline] pub fn $statistic(&self) -> f64 { self.$field.$statistic() } )* )* } impl Default for $name { fn default() -> $name { $name::new() } } $crate::impl_from_iterator!($name); // This should be conditionally activated if all fields implement `Merge`. // Could probably be implemented with specialization. /* impl $crate::Merge for $name { #[inline] fn merge(&mut self, other: &Self) { use $crate::Merge; $( self.$field.merge(&other.$field); )* } } */ }; } /// Implement `FromIterator` for an iterative estimator. #[macro_export] macro_rules! impl_from_iterator { ( $name:ident ) => { impl ::core::iter::FromIterator for $name { fn from_iter(iter: T) -> $name where T: IntoIterator { let mut e = $name::new(); for i in iter { e.add(i); } e } } impl<'a> ::core::iter::FromIterator<&'a f64> for $name { fn from_iter(iter: T) -> $name where T: IntoIterator { let mut e = $name::new(); for &i in iter { e.add(i); } e } } } } /// Implement `FromParallelIterator` for an iterative estimator. /// /// This will do nothing unless the `rayon` feature is enabled. #[macro_export] macro_rules! impl_from_par_iterator { ( $name:ident ) => { #[cfg(feature = "rayon")] #[cfg_attr(doc_cfg, doc(cfg(feature = "rayon")))] impl ::rayon::iter::FromParallelIterator for $name { fn from_par_iter(par_iter: I) -> $name where I: ::rayon::iter::IntoParallelIterator, Self: $crate::Merge, { use $crate::Merge; use ::rayon::iter::ParallelIterator; let par_iter = par_iter.into_par_iter(); par_iter.fold(|| $name::new(), |mut e, i| { e.add(i); e }).reduce(|| $name::new(), |mut a, b| { a.merge(&b); a }) } } #[cfg(feature = "rayon")] #[cfg_attr(doc_cfg, doc(cfg(feature = "rayon")))] impl<'a> ::rayon::iter::FromParallelIterator<&'a f64> for $name { fn from_par_iter(par_iter: I) -> $name where I: ::rayon::iter::IntoParallelIterator, Self: $crate::Merge, { use $crate::Merge; use ::rayon::iter::ParallelIterator; let par_iter = par_iter.into_par_iter(); par_iter.fold(|| $name::new(), |mut e, i| { e.add(*i); e }).reduce(|| $name::new(), |mut a, b| { a.merge(&b); a }) } } }; } average-0.13.1/src/minmax.rs000064400000000000000000000071610000000000000137300ustar 00000000000000#[cfg(feature = "serde1")] use serde::{Serialize, Deserialize}; use super::{Estimate, Merge}; /// Calculate the minimum of `a` and `b`. fn min(a: f64, b: f64) -> f64 { a.min(b) } /// Calculate the maximum of `a` and `b`. fn max(a: f64, b: f64) -> f64 { a.max(b) } /// Estimate the minimum of a sequence of numbers ("population"). /// /// /// ## Example /// /// ``` /// use average::Min; /// /// let a: Min = (1..6).map(f64::from).collect(); /// println!("The minimum is {}.", a.min()); /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct Min { x: f64, } impl Min { /// Create a new minium estimator from a given value. #[inline] pub fn from_value(x: f64) -> Min { Min { x } } /// Create a new minimum estimator. #[inline] pub fn new() -> Min { Min::from_value(::core::f64::INFINITY) } /// Estimate the minium of the population. #[inline] pub fn min(&self) -> f64 { self.x } } impl core::default::Default for Min { fn default() -> Min { Min::new() } } impl_from_iterator!(Min); impl_from_par_iterator!(Min); impl Estimate for Min { #[inline] fn add(&mut self, x: f64) { self.x = min(self.x, x); } #[inline] fn estimate(&self) -> f64 { self.min() } } impl Merge for Min { /// Merge another sample into this one. /// /// /// ## Example /// /// ``` /// use average::{Min, Merge}; /// /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.]; /// let (left, right) = sequence.split_at(3); /// let min_total: Min = sequence.iter().collect(); /// let mut min_left: Min = left.iter().collect(); /// let min_right: Min = right.iter().collect(); /// min_left.merge(&min_right); /// assert_eq!(min_total.min(), min_left.min()); /// ``` #[inline] fn merge(&mut self, other: &Min) { self.add(other.x); } } /// Estimate the maximum of a sequence of numbers ("population"). /// /// /// ## Example /// /// ``` /// use average::Max; /// /// let a: Max = (1..6).map(f64::from).collect(); /// assert_eq!(a.max(), 5.); /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct Max { x: f64, } impl Max { /// Create a new maxium estimator from a given value. #[inline] pub fn from_value(x: f64) -> Max { Max { x } } /// Create a new maximum estimator. #[inline] pub fn new() -> Max { Max::from_value(::core::f64::NEG_INFINITY) } /// Estimate the maxium of the population. #[inline] pub fn max(&self) -> f64 { self.x } } impl core::default::Default for Max { fn default() -> Max { Max::new() } } impl_from_iterator!(Max); impl_from_par_iterator!(Max); impl Estimate for Max { #[inline] fn add(&mut self, x: f64) { self.x = max(self.x, x); } #[inline] fn estimate(&self) -> f64 { self.max() } } impl Merge for Max { /// Merge another sample into this one. /// /// /// ## Example /// /// ``` /// use average::{Max, Merge}; /// /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.]; /// let (left, right) = sequence.split_at(3); /// let max_total: Max = sequence.iter().collect(); /// let mut max_left: Max = left.iter().collect(); /// let max_right: Max = right.iter().collect(); /// max_left.merge(&max_right); /// assert_eq!(max_total.max(), max_left.max()); /// ``` #[inline] fn merge(&mut self, other: &Max) { self.add(other.x); } } average-0.13.1/src/moments/kurtosis.rs000064400000000000000000000104630000000000000160030ustar 00000000000000/// Estimate the arithmetic mean, the variance, the skewness and the kurtosis of /// a sequence of numbers ("population"). /// /// This can be used to estimate the standard error of the mean. #[derive(Debug, Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct Kurtosis { /// Estimator of mean, variance and skewness. avg: Skewness, /// Intermediate sum of terms to the fourth for calculating the skewness. sum_4: f64, } impl Kurtosis { /// Create a new kurtosis estimator. #[inline] pub fn new() -> Kurtosis { Kurtosis { avg: Skewness::new(), sum_4: 0., } } /// Increment the sample size. /// /// This does not update anything else. #[inline] fn increment(&mut self) { self.avg.increment(); } /// Add an observation given an already calculated difference from the mean /// divided by the number of samples, assuming the inner count of the sample /// size was already updated. /// /// This is useful for avoiding unnecessary divisions in the inner loop. #[inline] fn add_inner(&mut self, delta: f64, delta_n: f64) { // This algorithm was suggested by Terriberry. // // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. let n = self.len().to_f64().unwrap(); let term = delta * delta_n * (n - 1.); let delta_n_sq = delta_n*delta_n; self.sum_4 += term * delta_n_sq * (n*n - 3.*n + 3.) + 6. * delta_n_sq * self.avg.avg.sum_2 - 4. * delta_n * self.avg.sum_3; self.avg.add_inner(delta, delta_n); } /// Determine whether the sample is empty. #[inline] pub fn is_empty(&self) -> bool { self.avg.is_empty() } /// Estimate the mean of the population. /// /// Returns 0 for an empty sample. #[inline] pub fn mean(&self) -> f64 { self.avg.mean() } /// Return the sample size. #[inline] pub fn len(&self) -> u64 { self.avg.len() } /// Calculate the sample variance. /// /// This is an unbiased estimator of the variance of the population. #[inline] pub fn sample_variance(&self) -> f64 { self.avg.sample_variance() } /// Calculate the population variance of the sample. /// /// This is a biased estimator of the variance of the population. #[inline] pub fn population_variance(&self) -> f64 { self.avg.population_variance() } /// Estimate the standard error of the mean of the population. #[inline] pub fn error_mean(&self) -> f64 { self.avg.error_mean() } /// Estimate the skewness of the population. #[inline] pub fn skewness(&self) -> f64 { self.avg.skewness() } /// Estimate the excess kurtosis of the population. #[inline] pub fn kurtosis(&self) -> f64 { if self.sum_4 == 0. { return 0.; } let n = self.len().to_f64().unwrap(); n * self.sum_4 / (self.avg.avg.sum_2 * self.avg.avg.sum_2) - 3. } } impl core::default::Default for Kurtosis { fn default() -> Kurtosis { Kurtosis::new() } } impl Estimate for Kurtosis { #[inline] fn add(&mut self, x: f64) { let delta = x - self.mean(); self.increment(); let n = self.len().to_f64().unwrap(); self.add_inner(delta, delta/n); } #[inline] fn estimate(&self) -> f64 { self.kurtosis() } } impl Merge for Kurtosis { #[inline] fn merge(&mut self, other: &Kurtosis) { let len_self = self.len().to_f64().unwrap(); let len_other = other.len().to_f64().unwrap(); let len_total = len_self + len_other; let delta = other.mean() - self.mean(); let delta_n = delta / len_total; let delta_n_sq = delta_n * delta_n; self.sum_4 += other.sum_4 + delta * delta_n*delta_n_sq * len_self*len_other * (len_self*len_self - len_self*len_other + len_other*len_other) + 6.*delta_n_sq * (len_self*len_self * other.avg.avg.sum_2 + len_other*len_other * self.avg.avg.sum_2) + 4.*delta_n * (len_self * other.avg.sum_3 - len_other * self.avg.sum_3); self.avg.merge(&other.avg); } } impl_from_iterator!(Kurtosis); impl_from_par_iterator!(Kurtosis); average-0.13.1/src/moments/mean.rs000064400000000000000000000064160000000000000150430ustar 00000000000000/// Estimate the arithmetic mean of a sequence of numbers ("population"). /// /// /// ## Example /// /// ``` /// use average::Mean; /// /// let a: Mean = (1..6).map(f64::from).collect(); /// println!("The mean is {}.", a.mean()); /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct Mean { /// Mean value. avg: f64, /// Sample size. n: u64, } impl Mean { /// Create a new mean estimator. #[inline] pub fn new() -> Mean { Mean { avg: 0., n: 0 } } /// Increment the sample size. /// /// This does not update anything else. #[inline] fn increment(&mut self) { self.n += 1; } /// Add an observation given an already calculated difference from the mean /// divided by the number of samples, assuming the inner count of the sample /// size was already updated. /// /// This is useful for avoiding unnecessary divisions in the inner loop. #[inline] fn add_inner(&mut self, delta_n: f64) { // This algorithm introduced by Welford in 1962 trades numerical // stability for a division inside the loop. // // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. self.avg += delta_n; } /// Determine whether the sample is empty. #[inline] pub fn is_empty(&self) -> bool { self.n == 0 } /// Estimate the mean of the population. /// /// Returns 0 for an empty sample. #[inline] pub fn mean(&self) -> f64 { self.avg } /// Return the sample size. #[inline] pub fn len(&self) -> u64 { self.n } } impl core::default::Default for Mean { fn default() -> Mean { Mean::new() } } impl Estimate for Mean { #[inline] fn add(&mut self, sample: f64) { self.increment(); let delta_n = (sample - self.avg) / self.n.to_f64().unwrap(); self.add_inner(delta_n); } fn estimate(&self) -> f64 { self.mean() } } impl Merge for Mean { /// Merge another sample into this one. /// /// /// ## Example /// /// ``` /// use average::{Mean, Merge}; /// /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.]; /// let (left, right) = sequence.split_at(3); /// let avg_total: Mean = sequence.iter().collect(); /// let mut avg_left: Mean = left.iter().collect(); /// let avg_right: Mean = right.iter().collect(); /// avg_left.merge(&avg_right); /// assert_eq!(avg_total.mean(), avg_left.mean()); /// ``` #[inline] fn merge(&mut self, other: &Mean) { // This algorithm was proposed by Chan et al. in 1979. // // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. let len_self = self.n.to_f64().unwrap(); let len_other = other.n.to_f64().unwrap(); let len_total = len_self + len_other; self.n += other.n; self.avg = (len_self * self.avg + len_other * other.avg) / len_total; // Chan et al. use // // self.avg += delta * len_other / len_total; // // instead but this results in cancelation if the number of samples are similar. } } impl_from_iterator!(Mean); impl_from_par_iterator!(Mean); average-0.13.1/src/moments/mod.rs000064400000000000000000000271200000000000000146750ustar 00000000000000use num_traits::ToPrimitive; #[cfg(feature = "serde1")] use serde::{Serialize, Deserialize}; use super::{Estimate, Merge}; include!("mean.rs"); include!("variance.rs"); #[cfg(any(feature = "std", feature = "libm"))] include!("skewness.rs"); #[cfg(any(feature = "std", feature = "libm"))] include!("kurtosis.rs"); /// Alias for `Variance`. pub type MeanWithError = Variance; #[doc(hidden)] #[macro_export] macro_rules! define_moments_common { ($name:ident, $MAX_MOMENT:expr) => ( use num_traits::{pow, ToPrimitive}; /// An iterator over binomial coefficients. struct IterBinomial { a: u64, n: u64, k: u64, } impl IterBinomial { /// For a given n, iterate over all binomial coefficients binomial(n, k), for k=0...n. #[inline] pub fn new(n: u64) -> IterBinomial { IterBinomial { k: 0, a: 1, n, } } } impl Iterator for IterBinomial { type Item = u64; #[inline] fn next(&mut self) -> Option { if self.k > self.n { return None; } self.a = if !(self.k == 0) { self.a * (self.n - self.k + 1) / self.k } else { 1 }; self.k += 1; Some(self.a) } } /// The maximal order of the moment to be calculated. const MAX_MOMENT: usize = $MAX_MOMENT; impl $name { /// Create a new moments estimator. #[inline] pub fn new() -> $name { $name { n: 0, avg: 0., m: [0.; MAX_MOMENT - 1], } } /// Determine whether the sample is empty. #[inline] pub fn is_empty(&self) -> bool { self.n == 0 } /// Return the sample size. #[inline] pub fn len(&self) -> u64 { self.n } /// Estimate the mean of the population. /// /// Returns 0 for an empty sample. #[inline] pub fn mean(&self) -> f64 { self.avg } /// Estimate the `p`th central moment of the population. #[inline] pub fn central_moment(&self, p: usize) -> f64 { let n = self.n.to_f64().unwrap(); match p { 0 => 1., 1 => 0., _ => self.m[p - 2] / n } } /// Estimate the `p`th standardized moment of the population. #[cfg(any(feature = "std", feature = "libm"))] #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] #[inline] pub fn standardized_moment(&self, p: usize) -> f64 { match p { 0 => self.n.to_f64().unwrap(), 1 => 0., 2 => 1., _ => { let variance = self.central_moment(2); assert_ne!(variance, 0.); self.central_moment(p) / pow( num_traits::Float::sqrt(variance), p) }, } } /// Calculate the sample variance. /// /// This is an unbiased estimator of the variance of the population. #[inline] pub fn sample_variance(&self) -> f64 { if self.n < 2 { return 0.; } self.m[0] / (self.n - 1).to_f64().unwrap() } /// Calculate the sample skewness. #[cfg(any(feature = "std", feature = "libm"))] #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] #[inline] pub fn sample_skewness(&self) -> f64 { use num_traits::Float; if self.n < 2 { return 0.; } let n = self.n.to_f64().unwrap(); if self.n < 3 { // Method of moments return self.central_moment(3) / Float::powf( n * (self.central_moment(2) / (n - 1.)), 1.5 ) } // Adjusted Fisher-Pearson standardized moment coefficient Float::sqrt(n * (n - 1.)) / (n * (n - 2.)) * Float::powf( self.central_moment(3) / (self.central_moment(2) / n), 1.5 ) } /// Calculate the sample excess kurtosis. #[inline] pub fn sample_excess_kurtosis(&self) -> f64 { if self.n < 4 { return 0.; } let n = self.n.to_f64().unwrap(); (n + 1.) * n * self.central_moment(4) / ((n - 1.) * (n - 2.) * (n - 3.) * pow(self.central_moment(2), 2)) - 3. * pow(n - 1., 2) / ((n - 2.) * (n - 3.)) } /// Add an observation sampled from the population. #[inline] pub fn add(&mut self, x: f64) { self.n += 1; let delta = x - self.avg; let n = self.n.to_f64().unwrap(); self.avg += delta / n; let mut coeff_delta = delta; let over_n = 1. / n; let mut term1 = (n - 1.) * (-over_n); let factor1 = -over_n; let mut term2 = (n - 1.) * over_n; let factor2 = (n - 1.) * over_n; let factor_coeff = -delta * over_n; let prev_m = self.m; for p in 2..=MAX_MOMENT { term1 *= factor1; term2 *= factor2; coeff_delta *= delta; self.m[p - 2] += (term1 + term2) * coeff_delta; let mut coeff = 1.; let mut binom = IterBinomial::new(p as u64); binom.next().unwrap(); // Skip k = 0. for k in 1..(p - 1) { coeff *= factor_coeff; self.m[p - 2] += binom.next().unwrap().to_f64().unwrap() * prev_m[p - 2 - k] * coeff; } } } } impl $crate::Merge for $name { #[inline] fn merge(&mut self, other: &$name) { let n_a = self.n.to_f64().unwrap(); let n_b = other.n.to_f64().unwrap(); let delta = other.avg - self.avg; self.n += other.n; let n = self.n.to_f64().unwrap(); let n_a_over_n = n_a / n; let n_b_over_n = n_b / n; self.avg += n_b_over_n * delta; let factor_a = -n_b_over_n * delta; let factor_b = n_a_over_n * delta; let mut term_a = n_a * factor_a; let mut term_b = n_b * factor_b; let prev_m = self.m; for p in 2..=MAX_MOMENT { term_a *= factor_a; term_b *= factor_b; self.m[p - 2] += other.m[p - 2] + term_a + term_b; let mut coeff_a = 1.; let mut coeff_b = 1.; let mut coeff_delta = 1.; let mut binom = IterBinomial::new(p as u64); binom.next().unwrap(); for k in 1..(p - 1) { coeff_a *= -n_b_over_n; coeff_b *= n_a_over_n; coeff_delta *= delta; self.m[p - 2] += binom.next().unwrap().to_f64().unwrap() * coeff_delta * (prev_m[p - 2 - k] * coeff_a + other.m[p - 2 - k] * coeff_b); } } } } impl core::default::Default for $name { fn default() -> $name { $name::new() } } $crate::impl_from_iterator!($name); $crate::impl_from_par_iterator!($name); ); } #[cfg(feature = "serde1")] #[doc(hidden)] #[macro_export] macro_rules! define_moments_inner { ($name:ident, $MAX_MOMENT:expr) => ( $crate::define_moments_common!($name, $MAX_MOMENT); use serde::{Serialize, Deserialize}; /// Estimate the first N moments of a sequence of numbers ("population"). #[derive(Debug, Clone, Serialize, Deserialize)] pub struct $name { /// Number of samples. /// /// Technically, this is the same as m_0, but we want this to be an integer /// to avoid numerical issues, so we store it separately. n: u64, /// Average. avg: f64, /// Moments times `n`. /// /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition. m: [f64; MAX_MOMENT - 1], } ); } #[cfg(not(feature = "serde1"))] #[doc(hidden)] #[macro_export] macro_rules! define_moments_inner { ($name:ident, $MAX_MOMENT:expr) => ( $crate::define_moments_common!($name, $MAX_MOMENT); /// Estimate the first N moments of a sequence of numbers ("population"). #[derive(Debug, Clone)] pub struct $name { /// Number of samples. /// /// Technically, this is the same as m_0, but we want this to be an integer /// to avoid numerical issues, so we store it separately. n: u64, /// Average. avg: f64, /// Moments times `n`. /// /// Starts with m_2. m_0 is the same as `n` and m_1 is 0 by definition. m: [f64; MAX_MOMENT - 1], } ); } /// Define an estimator of all moments up to a number given at compile time. /// /// This uses a [general algorithm][paper] and is slightly less efficient than /// the specialized implementations (such as [`Mean`], [`Variance`], /// [`Skewness`] and [`Kurtosis`]), but it works for any number of moments >= 4. /// /// (In practise, there is an upper limit due to integer overflow and possibly /// numerical issues.) /// /// [paper]: https://doi.org/10.1007/s00180-015-0637-z. /// [`Mean`]: ./struct.Mean.html /// [`Variance`]: ./struct.Variance.html /// [`Skewness`]: ./struct.Skewness.html /// [`Kurtosis`]: ./struct.Kurtosis.html /// /// /// # Example /// /// ``` /// use average::{define_moments, assert_almost_eq}; /// /// define_moments!(Moments4, 4); /// /// let mut a: Moments4 = (1..6).map(f64::from).collect(); /// assert_eq!(a.len(), 5); /// assert_eq!(a.mean(), 3.0); /// assert_eq!(a.central_moment(0), 1.0); /// assert_eq!(a.central_moment(1), 0.0); /// assert_eq!(a.central_moment(2), 2.0); /// assert_eq!(a.standardized_moment(0), 5.0); /// assert_eq!(a.standardized_moment(1), 0.0); /// assert_eq!(a.standardized_moment(2), 1.0); /// a.add(1.0); /// // skewness /// assert_almost_eq!(a.standardized_moment(3), 0.2795084971874741, 1e-15); /// // kurtosis /// assert_almost_eq!(a.standardized_moment(4), -1.365 + 3.0, 1e-14); /// ``` #[macro_export] macro_rules! define_moments { ($name:ident, $MAX_MOMENT:expr) => ($crate::define_moments_inner!($name, $MAX_MOMENT);); } average-0.13.1/src/moments/skewness.rs000064400000000000000000000077140000000000000157670ustar 00000000000000use num_traits::Float; /// Estimate the arithmetic mean, the variance and the skewness of a sequence of /// numbers ("population"). /// /// This can be used to estimate the standard error of the mean. #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] #[derive(Debug, Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct Skewness { /// Estimator of mean and variance. avg: MeanWithError, /// Intermediate sum of cubes for calculating the skewness. sum_3: f64, } impl Skewness { /// Create a new skewness estimator. #[inline] pub fn new() -> Skewness { Skewness { avg: MeanWithError::new(), sum_3: 0., } } /// Increment the sample size. /// /// This does not update anything else. #[inline] fn increment(&mut self) { self.avg.increment(); } /// Add an observation given an already calculated difference from the mean /// divided by the number of samples, assuming the inner count of the sample /// size was already updated. /// /// This is useful for avoiding unnecessary divisions in the inner loop. #[inline] fn add_inner(&mut self, delta: f64, delta_n: f64) { // This algorithm was suggested by Terriberry. // // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. let n = self.len().to_f64().unwrap(); let term = delta * delta_n * (n - 1.); self.sum_3 += term * delta_n * (n - 2.) - 3.*delta_n * self.avg.sum_2; self.avg.add_inner(delta_n); } /// Determine whether the sample is empty. #[inline] pub fn is_empty(&self) -> bool { self.avg.is_empty() } /// Estimate the mean of the population. /// /// Returns 0 for an empty sample. #[inline] pub fn mean(&self) -> f64 { self.avg.mean() } /// Return the sample size. #[inline] pub fn len(&self) -> u64 { self.avg.len() } /// Calculate the sample variance. /// /// This is an unbiased estimator of the variance of the population. #[inline] pub fn sample_variance(&self) -> f64 { self.avg.sample_variance() } /// Calculate the population variance of the sample. /// /// This is a biased estimator of the variance of the population. #[inline] pub fn population_variance(&self) -> f64 { self.avg.population_variance() } /// Estimate the standard error of the mean of the population. #[inline] pub fn error_mean(&self) -> f64 { self.avg.error() } /// Estimate the skewness of the population. #[inline] pub fn skewness(&self) -> f64 { if self.sum_3 == 0. { return 0.; } let n = self.len().to_f64().unwrap(); let sum_2 = self.avg.sum_2; debug_assert_ne!(sum_2, 0.); Float::sqrt(n) * self.sum_3 / Float::sqrt(sum_2*sum_2*sum_2) } } impl Default for Skewness { fn default() -> Skewness { Skewness::new() } } impl Estimate for Skewness { #[inline] fn add(&mut self, x: f64) { let delta = x - self.mean(); self.increment(); let n = self.len().to_f64().unwrap(); self.add_inner(delta, delta/n); } #[inline] fn estimate(&self) -> f64 { self.skewness() } } impl Merge for Skewness { #[inline] fn merge(&mut self, other: &Skewness) { let len_self = self.len().to_f64().unwrap(); let len_other = other.len().to_f64().unwrap(); let len_total = len_self + len_other; let delta = other.mean() - self.mean(); let delta_n = delta / len_total; self.sum_3 += other.sum_3 + delta*delta_n*delta_n * len_self*len_other*(len_self - len_other) + 3.*delta_n * (len_self * other.avg.sum_2 - len_other * self.avg.sum_2); self.avg.merge(&other.avg); } } impl_from_iterator!(Skewness); impl_from_par_iterator!(Skewness); average-0.13.1/src/moments/variance.rs000064400000000000000000000115160000000000000157100ustar 00000000000000/// Estimate the arithmetic mean and the variance of a sequence of numbers /// ("population"). /// /// This can be used to estimate the standard error of the mean. /// /// /// ## Example /// /// ``` /// use average::Variance; /// /// let a: Variance = (1..6).map(f64::from).collect(); /// println!("The mean is {} ± {}.", a.mean(), a.error()); /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct Variance { /// Estimator of average. avg: Mean, /// Intermediate sum of squares for calculating the variance. sum_2: f64, } impl Variance { /// Create a new variance estimator. #[inline] pub fn new() -> Variance { Variance { avg: Mean::new(), sum_2: 0. } } /// Increment the sample size. /// /// This does not update anything else. #[inline] fn increment(&mut self) { self.avg.increment(); } /// Add an observation given an already calculated difference from the mean /// divided by the number of samples, assuming the inner count of the sample /// size was already updated. /// /// This is useful for avoiding unnecessary divisions in the inner loop. #[inline] fn add_inner(&mut self, delta_n: f64) { // This algorithm introduced by Welford in 1962 trades numerical // stability for a division inside the loop. // // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. let n = self.avg.len().to_f64().unwrap(); self.avg.add_inner(delta_n); self.sum_2 += delta_n * delta_n * n * (n - 1.); } /// Determine whether the sample is empty. #[inline] pub fn is_empty(&self) -> bool { self.avg.is_empty() } /// Estimate the mean of the population. /// /// Returns 0 for an empty sample. #[inline] pub fn mean(&self) -> f64 { self.avg.mean() } /// Return the sample size. #[inline] pub fn len(&self) -> u64 { self.avg.len() } /// Calculate the sample variance. /// /// This is an unbiased estimator of the variance of the population. #[inline] pub fn sample_variance(&self) -> f64 { if self.avg.len() < 2 { return 0.; } self.sum_2 / (self.avg.len() - 1).to_f64().unwrap() } /// Calculate the population variance of the sample. /// /// This is a biased estimator of the variance of the population. #[inline] pub fn population_variance(&self) -> f64 { let n = self.avg.len(); if n < 2 { return 0.; } self.sum_2 / n.to_f64().unwrap() } /// Estimate the variance of the mean of the population. #[inline] pub fn variance_of_mean(&self) -> f64 { let n = self.avg.len(); if n == 0 { return 0.; } self.sample_variance() / n.to_f64().unwrap() } /// Estimate the standard error of the mean of the population. #[cfg(any(feature = "std", feature = "libm"))] #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] #[inline] pub fn error(&self) -> f64 { num_traits::Float::sqrt(self.variance_of_mean()) } } impl core::default::Default for Variance { fn default() -> Variance { Variance::new() } } impl Estimate for Variance { #[inline] fn add(&mut self, sample: f64) { self.increment(); let delta_n = (sample - self.avg.mean()) / self.len().to_f64().unwrap(); self.add_inner(delta_n); } #[inline] fn estimate(&self) -> f64 { self.population_variance() } } impl Merge for Variance { /// Merge another sample into this one. /// /// /// ## Example /// /// ``` /// use average::{Variance, Merge}; /// /// let sequence: &[f64] = &[1., 2., 3., 4., 5., 6., 7., 8., 9.]; /// let (left, right) = sequence.split_at(3); /// let avg_total: Variance = sequence.iter().collect(); /// let mut avg_left: Variance = left.iter().collect(); /// let avg_right: Variance = right.iter().collect(); /// avg_left.merge(&avg_right); /// assert_eq!(avg_total.mean(), avg_left.mean()); /// assert_eq!(avg_total.sample_variance(), avg_left.sample_variance()); /// ``` #[inline] fn merge(&mut self, other: &Variance) { // This algorithm was proposed by Chan et al. in 1979. // // See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance. let len_self = self.len().to_f64().unwrap(); let len_other = other.len().to_f64().unwrap(); let len_total = len_self + len_other; let delta = other.mean() - self.mean(); self.avg.merge(&other.avg); self.sum_2 += other.sum_2 + delta*delta * len_self * len_other / len_total; } } impl_from_iterator!(Variance); impl_from_par_iterator!(Variance); average-0.13.1/src/quantile.rs000064400000000000000000000146050000000000000142620ustar 00000000000000use core::cmp::min; use easy_cast::{Conv, ConvFloat}; use num_traits::{ToPrimitive, Float}; use float_ord::sort as sort_floats; #[cfg(feature = "serde1")] use serde::{Serialize, Deserialize}; use super::Estimate; /// Estimate the p-quantile of a sequence of numbers ("population"). /// /// The [P² algorithm][1] is employed. It uses constant space but the relative /// error of the quantile estimate is not bounded by a function of the number of /// samples. For algorithms that use growing space with bounded error, see the /// [`quantiles`][2] crate. /// /// It is recommended to use a different algorithm for discrete distributions /// and a small number of samples, or for quantiles close to a singularity in /// the distribution. /// /// [1]: http://www.cs.wustl.edu/~jain/papers/ftp/psqr.pdf /// [2]: https://crates.io/crates/quantiles #[derive(Debug, Clone)] #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct Quantile { /// Marker heights. q: [f64; 5], /// Marker positions. n: [i64; 5], /// Desired marker positions. m: [f64; 5], /// Increment in desired marker positions. dm: [f64; 5], } impl Quantile { /// Create a new p-quantile estimator. /// /// Panics if `p` is not between 0 and 1. #[inline] pub fn new(p: f64) -> Quantile { assert!((0. ..= 1.).contains(&p)); Quantile { q: [0.; 5], n: [1, 2, 3, 4, 0], m: [1., 1. + 2.*p, 1. + 4.*p, 3. + 2.*p, 5.], dm: [0., p/2., p, (1. + p)/2., 1.], } } /// Return the value of `p` for this p-quantile. #[inline] pub fn p(&self) -> f64 { self.dm[2] } /// Parabolic prediction for marker height. #[inline] fn parabolic(&self, i: usize, d: f64) -> f64 { debug_assert_eq!(d.abs(), 1.); let s = i64::conv_nearest(d); self.q[i] + d / (self.n[i + 1] - self.n[i - 1]).to_f64().unwrap() * ((self.n[i] - self.n[i - 1] + s).to_f64().unwrap() * (self.q[i + 1] - self.q[i]) / (self.n[i + 1] - self.n[i]).to_f64().unwrap() + (self.n[i + 1] - self.n[i] - s).to_f64().unwrap() * (self.q[i] - self.q[i - 1]) / (self.n[i] - self.n[i - 1]).to_f64().unwrap()) } /// Linear prediction for marker height. #[inline] fn linear(&self, i: usize, d: f64) -> f64 { debug_assert_eq!(d.abs(), 1.); let sum = if d < 0. { i - 1 } else { i + 1 }; self.q[i] + d * (self.q[sum] - self.q[i]) / (self.n[sum] - self.n[i]).to_f64().unwrap() } /// Estimate the p-quantile of the population. /// /// Returns 0 for an empty sample. #[inline] pub fn quantile(&self) -> f64 { if self.len() >= 5 { return self.q[2]; } // Estimate quantile by sorting the sample. if self.is_empty() { return 0.; } let mut heights: [f64; 4] = [ self.q[0], self.q[1], self.q[2], self.q[3] ]; let len = usize::conv(self.len()); debug_assert!(len < 5); sort_floats(&mut heights[..len]); let desired_index = f64::conv(len) * self.p() - 1.; let mut index = desired_index.ceil(); if desired_index == index && index >= 0. { let index = usize::conv_nearest(index); debug_assert!(index < 5); if index < len - 1 { // `q[index]` and `q[index + 1]` are equally valid estimates, // by convention we take their average. return 0.5*self.q[index] + 0.5*self.q[index + 1]; } } index = index.max(0.); let mut index = usize::conv_nearest(index); debug_assert!(index < 5); index = min(index, len - 1); self.q[index] } /// Return the sample size. #[inline] pub fn len(&self) -> u64 { debug_assert!(self.n[4] >= 0); u64::conv(self.n[4]) } /// Determine whether the sample is empty. #[inline] pub fn is_empty(&self) -> bool { self.len() == 0 } } impl core::default::Default for Quantile { /// Create a new median estimator. fn default() -> Quantile { Quantile::new(0.5) } } impl Estimate for Quantile { #[inline] fn add(&mut self, x: f64) { // n[4] is the sample size. if self.n[4] < 5 { self.q[usize::conv(self.n[4])] = x; self.n[4] += 1; if self.n[4] == 5 { sort_floats(&mut self.q); } return; } // Find cell k. let mut k: usize; if x < self.q[0] { self.q[0] = x; k = 0; } else { k = 4; for i in 1..5 { if x < self.q[i] { k = i; break; } } if self.q[4] < x { self.q[4] = x; } }; // Increment all positions greater than k. for i in k..5 { self.n[i] += 1; } for i in 0..5 { self.m[i] += self.dm[i]; } // Adjust height of markers. for i in 1..4 { let d = self.m[i] - self.n[i].to_f64().unwrap(); if d >= 1. && self.n[i + 1] - self.n[i] > 1 || d <= -1. && self.n[i - 1] - self.n[i] < -1 { let d = Float::signum(d); let q_new = self.parabolic(i, d); if self.q[i - 1] < q_new && q_new < self.q[i + 1] { self.q[i] = q_new; } else { self.q[i] = self.linear(i, d); } let delta = i64::conv_nearest(d); debug_assert_eq!(delta.abs(), 1); self.n[i] += delta; } } } fn estimate(&self) -> f64 { self.quantile() } } #[test] fn reference() { let observations = [ 0.02, 0.5, 0.74, 3.39, 0.83, 22.37, 10.15, 15.43, 38.62, 15.92, 34.60, 10.28, 1.47, 0.40, 0.05, 11.39, 0.27, 0.42, 0.09, 11.37, ]; let mut q = Quantile::new(0.5); for &o in observations.iter() { q.add(o); } assert_eq!(q.n, [1, 6, 10, 16, 20]); assert_eq!(q.m, [1., 5.75, 10.50, 15.25, 20.0]); assert_eq!(q.len(), 20); assert_eq!(q.quantile(), 4.2462394088036435); } average-0.13.1/src/traits.rs000064400000000000000000000111660000000000000137450ustar 00000000000000/// Estimate a statistic of a sequence of numbers ("population"). pub trait Estimate { /// Add an observation sampled from the population. fn add(&mut self, x: f64); /// Estimate the statistic of the population. fn estimate(&self) -> f64; } /// Merge with another estimator. pub trait Merge { /// Merge the other estimator into this one. /// /// Both estimators are assumed to be fed samples from the same population. /// /// This method is useful for parallelizing the calculation of estimates: /// ``` /// use average::{Estimate, Mean, Merge}; /// /// let data = &[1., 2., 3., 4., 5., 6., 7., 8., 9., 10.]; /// /// let mut thread1 = std::thread::spawn(move || -> Mean { /// let mut avg = Mean::new(); /// for &x in &data[..5] { /// avg.add(x); /// } /// avg /// }); /// let mut thread2 = std::thread::spawn(move || -> Mean { /// let mut avg = Mean::new(); /// for &x in &data[5..] { /// avg.add(x); /// } /// avg /// }); /// /// let mut avg = thread1.join().unwrap(); /// avg.merge(&thread2.join().unwrap()); /// assert_eq!(avg.mean(), 5.5); /// ``` fn merge(&mut self, other: &Self); } /// Calculate the multinomial variance. Relevant for histograms. #[inline(always)] fn multinomal_variance(n: f64, n_tot_inv: f64) -> f64 { n * (1. - n * n_tot_inv) } /// Get the bins and ranges from a histogram. pub trait Histogram: where for<'a> &'a Self: IntoIterator { /// Return the bins of the histogram. fn bins(&self) -> &[u64]; /// Estimate the variance for the given bin. /// /// The square root of this estimates the error of the bin count. #[inline] fn variance(&self, bin: usize) -> f64 { let count = self.bins()[bin]; let sum: u64 = self.bins().iter().sum(); multinomal_variance(count as f64, 1./(sum as f64)) } /// Return an iterator over the bins normalized by the bin widths. #[inline] fn normalized_bins(&self) -> IterNormalized<<&Self as IntoIterator>::IntoIter> { IterNormalized { histogram_iter: self.into_iter() } } /// Return an iterator over the bin widths. #[inline] fn widths(&self) -> IterWidths<<&Self as IntoIterator>::IntoIter> { IterWidths { histogram_iter: self.into_iter() } } /// Return an iterator over the bin centers. #[inline] fn centers(&self) -> IterBinCenters<<&Self as IntoIterator>::IntoIter> { IterBinCenters { histogram_iter: self.into_iter() } } /// Return an iterator over the bin variances. /// /// This is more efficient than calling `variance()` for each bin. #[inline] fn variances(&self) -> IterVariances<<&Self as IntoIterator>::IntoIter> { let sum: u64 = self.bins().iter().sum(); IterVariances { histogram_iter: self.into_iter(), sum_inv: 1./(sum as f64) } } } /// Iterate over the bins normalized by bin width. #[derive(Debug, Clone)] pub struct IterNormalized where T: Iterator { histogram_iter: T, } impl Iterator for IterNormalized where T: Iterator { type Item = f64; #[inline] fn next(&mut self) -> Option { self.histogram_iter.next().map(|((a, b), count)| (count as f64) / (b - a)) } } /// Iterate over the widths of the bins. #[derive(Debug, Clone)] pub struct IterWidths where T: Iterator { histogram_iter: T, } impl Iterator for IterWidths where T: Iterator { type Item = f64; #[inline] fn next(&mut self) -> Option { self.histogram_iter.next().map(|((a, b), _)| b - a) } } /// Iterate over the bin centers. #[derive(Debug, Clone)] pub struct IterBinCenters where T: Iterator { histogram_iter: T, } impl Iterator for IterBinCenters where T: Iterator { type Item = f64; #[inline] fn next(&mut self) -> Option { self.histogram_iter.next().map(|((a, b), _)| 0.5 * (a + b)) } } /// Iterate over the variances. #[derive(Debug, Clone)] pub struct IterVariances where T: Iterator { histogram_iter: T, sum_inv: f64, } impl Iterator for IterVariances where T: Iterator { type Item = f64; #[inline] fn next(&mut self) -> Option { self.histogram_iter.next() .map(|(_, n)| multinomal_variance(n as f64, self.sum_inv)) } } average-0.13.1/src/weighted_mean.rs000064400000000000000000000246340000000000000152430ustar 00000000000000#[cfg(feature = "serde1")] use serde::{Serialize, Deserialize}; use super::{MeanWithError, Estimate, Merge}; /// Estimate the weighted and unweighted arithmetic mean of a sequence of /// numbers ("population"). /// /// /// ## Example /// /// ``` /// use average::WeightedMean; /// /// let a: WeightedMean = (1..6).zip(1..6) /// .map(|(x, w)| (f64::from(x), f64::from(w))).collect(); /// println!("The weighted mean is {}.", a.mean()); /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct WeightedMean { /// Sum of the weights. weight_sum: f64, /// Weighted mean value. weighted_avg: f64, } impl WeightedMean { /// Create a new weighted and unweighted mean estimator. pub fn new() -> WeightedMean { WeightedMean { weight_sum: 0., weighted_avg: 0., } } /// Add an observation sampled from the population. #[inline] pub fn add(&mut self, sample: f64, weight: f64) { // The algorithm for the unweighted mean was suggested by Welford in 1962. // // See // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance // and // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf. self.weight_sum += weight; let prev_avg = self.weighted_avg; self.weighted_avg = prev_avg + (weight / self.weight_sum) * (sample - prev_avg); } /// Determine whether the sample is empty. /// /// Might be a false positive if the sum of weights is zero. #[inline] pub fn is_empty(&self) -> bool { self.weight_sum == 0. } /// Return the sum of the weights. /// /// Returns 0 for an empty sample. #[inline] pub fn sum_weights(&self) -> f64 { self.weight_sum } /// Estimate the weighted mean of the population. /// /// Returns 0 for an empty sample. #[inline] pub fn mean(&self) -> f64 { self.weighted_avg } } impl core::default::Default for WeightedMean { fn default() -> WeightedMean { WeightedMean::new() } } impl core::iter::FromIterator<(f64, f64)> for WeightedMean { fn from_iter(iter: T) -> WeightedMean where T: IntoIterator { let mut a = WeightedMean::new(); for (i, w) in iter { a.add(i, w); } a } } impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMean { fn from_iter(iter: T) -> WeightedMean where T: IntoIterator { let mut a = WeightedMean::new(); for &(i, w) in iter { a.add(i, w); } a } } impl Merge for WeightedMean { /// Merge another sample into this one. /// /// /// ## Example /// /// ``` /// use average::{WeightedMean, Merge}; /// /// let weighted_sequence: &[(f64, f64)] = &[ /// (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5), /// (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)]; /// let (left, right) = weighted_sequence.split_at(3); /// let avg_total: WeightedMean = weighted_sequence.iter().collect(); /// let mut avg_left: WeightedMean = left.iter().collect(); /// let avg_right: WeightedMean = right.iter().collect(); /// avg_left.merge(&avg_right); /// assert!((avg_total.mean() - avg_left.mean()).abs() < 1e-15); /// ``` #[inline] fn merge(&mut self, other: &WeightedMean) { let total_weight_sum = self.weight_sum + other.weight_sum; self.weighted_avg = (self.weight_sum * self.weighted_avg + other.weight_sum * other.weighted_avg) / total_weight_sum; self.weight_sum = total_weight_sum; } } /// Estimate the weighted and unweighted arithmetic mean and the unweighted /// variance of a sequence of numbers ("population"). /// /// This can be used to estimate the standard error of the weighted mean. /// /// /// ## Example /// /// ``` /// use average::WeightedMeanWithError; /// /// let a: WeightedMeanWithError = (1..6).zip(1..6) /// .map(|(x, w)| (f64::from(x), f64::from(w))).collect(); /// println!("The weighted mean is {} ± {}.", a.weighted_mean(), a.error()); /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] pub struct WeightedMeanWithError { /// Sum of the squares of the weights. weight_sum_sq: f64, /// Estimator of the weighted mean. weighted_avg: WeightedMean, /// Estimator of unweighted mean and its variance. unweighted_avg: MeanWithError, } impl WeightedMeanWithError { /// Create a new weighted and unweighted mean estimator. #[inline] pub fn new() -> WeightedMeanWithError { WeightedMeanWithError { weight_sum_sq: 0., weighted_avg: WeightedMean::new(), unweighted_avg: MeanWithError::new(), } } /// Add an observation sampled from the population. #[inline] pub fn add(&mut self, sample: f64, weight: f64) { // The algorithm for the unweighted mean was suggested by Welford in 1962. // The algorithm for the weighted mean was suggested by West in 1979. // // See // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance // and // http://people.ds.cam.ac.uk/fanf2/hermes/doc/antiforgery/stats.pdf. self.weight_sum_sq += weight*weight; self.weighted_avg.add(sample, weight); self.unweighted_avg.add(sample); } /// Determine whether the sample is empty. #[inline] pub fn is_empty(&self) -> bool { self.unweighted_avg.is_empty() } /// Return the sum of the weights. /// /// Returns 0 for an empty sample. #[inline] pub fn sum_weights(&self) -> f64 { self.weighted_avg.sum_weights() } /// Return the sum of the squared weights. /// /// Returns 0 for an empty sample. #[inline] pub fn sum_weights_sq(&self) -> f64 { self.weight_sum_sq } /// Estimate the weighted mean of the population. /// /// Returns 0 for an empty sample. #[inline] pub fn weighted_mean(&self) -> f64 { self.weighted_avg.mean() } /// Estimate the unweighted mean of the population. /// /// Returns 0 for an empty sample. #[inline] pub fn unweighted_mean(&self) -> f64 { self.unweighted_avg.mean() } /// Return the sample size. #[inline] pub fn len(&self) -> u64 { self.unweighted_avg.len() } /// Calculate the effective sample size. #[inline] pub fn effective_len(&self) -> f64 { if self.is_empty() { return 0. } let weight_sum = self.weighted_avg.sum_weights(); weight_sum * weight_sum / self.weight_sum_sq } /// Calculate the *unweighted* population variance of the sample. /// /// This is a biased estimator of the variance of the population. #[inline] pub fn population_variance(&self) -> f64 { self.unweighted_avg.population_variance() } /// Calculate the *unweighted* sample variance. /// /// This is an unbiased estimator of the variance of the population. #[inline] pub fn sample_variance(&self) -> f64 { self.unweighted_avg.sample_variance() } /// Estimate the standard error of the *weighted* mean of the population. /// /// Returns 0 if the sum of weights is 0. /// /// This unbiased estimator assumes that the samples were independently /// drawn from the same population with constant variance. #[inline] pub fn variance_of_weighted_mean(&self) -> f64 { // This uses the same estimate as WinCross, which should provide better // results than the ones used by SPSS or Mentor. // // See http://www.analyticalgroup.com/download/WEIGHTED_VARIANCE.pdf. let weight_sum = self.weighted_avg.sum_weights(); if weight_sum == 0. { return 0.; } let inv_effective_len = self.weight_sum_sq / (weight_sum * weight_sum); self.sample_variance() * inv_effective_len } /// Estimate the standard error of the *weighted* mean of the population. /// /// Returns 0 if the sum of weights is 0. /// /// This unbiased estimator assumes that the samples were independently /// drawn from the same population with constant variance. #[cfg(any(feature = "std", feature = "libm"))] #[cfg_attr(doc_cfg, doc(cfg(any(feature = "std", feature = "libm"))))] #[inline] pub fn error(&self) -> f64 { num_traits::Float::sqrt(self.variance_of_weighted_mean()) } } impl Merge for WeightedMeanWithError { /// Merge another sample into this one. /// /// /// ## Example /// /// ``` /// use average::{WeightedMeanWithError, Merge}; /// /// let weighted_sequence: &[(f64, f64)] = &[ /// (1., 0.1), (2., 0.2), (3., 0.3), (4., 0.4), (5., 0.5), /// (6., 0.6), (7., 0.7), (8., 0.8), (9., 0.9)]; /// let (left, right) = weighted_sequence.split_at(3); /// let avg_total: WeightedMeanWithError = weighted_sequence.iter().collect(); /// let mut avg_left: WeightedMeanWithError = left.iter().collect(); /// let avg_right: WeightedMeanWithError = right.iter().collect(); /// avg_left.merge(&avg_right); /// assert!((avg_total.weighted_mean() - avg_left.weighted_mean()).abs() < 1e-15); /// assert!((avg_total.error() - avg_left.error()).abs() < 1e-15); /// ``` #[inline] fn merge(&mut self, other: &WeightedMeanWithError) { self.weight_sum_sq += other.weight_sum_sq; self.weighted_avg.merge(&other.weighted_avg); self.unweighted_avg.merge(&other.unweighted_avg); } } impl core::default::Default for WeightedMeanWithError { fn default() -> WeightedMeanWithError { WeightedMeanWithError::new() } } impl core::iter::FromIterator<(f64, f64)> for WeightedMeanWithError { fn from_iter(iter: T) -> WeightedMeanWithError where T: IntoIterator { let mut a = WeightedMeanWithError::new(); for (i, w) in iter { a.add(i, w); } a } } impl<'a> core::iter::FromIterator<&'a (f64, f64)> for WeightedMeanWithError { fn from_iter(iter: T) -> WeightedMeanWithError where T: IntoIterator { let mut a = WeightedMeanWithError::new(); for &(i, w) in iter { a.add(i, w); } a } }