statistical-1.0.0/.gitignore010064400017500001750000000000221342423044500142400ustar0000000000000000target Cargo.lock statistical-1.0.0/.travis.yml010064400017500001750000000001301342423044500143610ustar0000000000000000language: rust script: - cargo build --verbose - cargo test --verbose - cargo doc statistical-1.0.0/AUTHORS010064400017500001750000000001351342423044500133250ustar0000000000000000Initial author: Jeff Belgum With help and contributions from: Benjamin Thompson Justin Uy statistical-1.0.0/Cargo.toml.orig010064400017500001750000000007401342423053300151440ustar0000000000000000[package] name = "statistical" description = "A simple statistics library" version = "1.0.0" authors = ["Jeff Belgum "] repository = "https://github.com/JeffBelgum/statistical" documentation = "https://jeffbelgum.github.io/statistical/statistical/" homepage = "https://github.com/JeffBelgum/statistical" license = "MIT" readme = "README.md" keywords = ["statistics", "statistical", "analysis", "math", "algorithm"] [dependencies] rand = "0.6" num = "0.2" statistical-1.0.0/Cargo.toml0000644000000020110000000000000114050ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g. crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "statistical" version = "1.0.0" authors = ["Jeff Belgum "] description = "A simple statistics library" homepage = "https://github.com/JeffBelgum/statistical" documentation = "https://jeffbelgum.github.io/statistical/statistical/" readme = "README.md" keywords = ["statistics", "statistical", "analysis", "math", "algorithm"] license = "MIT" repository = "https://github.com/JeffBelgum/statistical" [dependencies.num] version = "0.2" [dependencies.rand] version = "0.6" statistical-1.0.0/LICENSE010064400017500001750000000020661342423044500132670ustar0000000000000000The MIT License (MIT) Copyright (c) 2015 Jeff Belgum Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. statistical-1.0.0/README.md010064400017500001750000000012131342423052600135320ustar0000000000000000Statistical [![Build Status](https://travis-ci.org/JeffBelgum/statistical.svg?branch=master)](https://travis-ci.org/JeffBelgum/statistical) [![](http://meritbadge.herokuapp.com/statistical)](https://crates.io/crates/statistical) ------------ A simple statistics library written in Rust. It draws inspiration from the python stdlib statistics module. [Documentation](https://jeffbelgum.github.io/statistical/statistical/) Usage ----- Add this to your `Cargo.toml`: ```ini [dependencies] statistical = "1.0.0" ``` and this to your crate root: ```rust extern crate statistical; ``` Contributions ------------- Pull Requests and Issues welcome! statistical-1.0.0/src/lib.rs010064400017500001750000000034601342423044500141640ustar0000000000000000// Copyright (c) 2015 Jeff Belgum // // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated // documentation files (the "Software"), to deal in the Software without restriction, including without // limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be included in all copies or substantial portions // of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT // SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. //! A simple statistics library //! //! Heavily inspired by the python standard library statistics module. extern crate rand; extern crate num; mod univariate_; mod stats_; pub mod univariate { pub use univariate_::{ harmonic_mean, geometric_mean, quadratic_mean, mode, average_deviation, pearson_skewness, skewness, pskewness, kurtosis, pkurtosis, standard_error_mean, standard_error_skewness, standard_error_kurtosis }; } pub use univariate::mode; pub use stats_::{ Degree, mean, median, variance, population_variance, standard_deviation, population_standard_deviation, standard_scores }; statistical-1.0.0/src/stats_.rs010064400017500001750000000222501342423044500147110ustar0000000000000000// Copyright (c) 2015 Jeff Belgum // // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated // documentation files (the "Software"), to deal in the Software without restriction, including without // limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be included in all copies or substantial portions // of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT // SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. extern crate rand; extern crate num; use num::{Float, Num, NumCast, One, Zero}; pub enum Degree { One, Two, Three, Four } pub fn std_moment(v: &[T], r: Degree, _mean: Option, pstdev: Option) -> T where T: Float { let _mean = _mean.unwrap_or_else(|| mean(v)); let pstdev = pstdev.unwrap_or_else(|| population_standard_deviation(v, Some(_mean))); let r = match r { Degree::One => 1, Degree::Two => 2, Degree::Three => 3, Degree::Four => 4 }; v.iter().map(|&x| ((x-_mean)/pstdev).powi(r)).fold(T::zero(), |acc, elem| acc + elem) } /// The mean is the sum of a collection of numbers divided by the number of numbers in the collection. /// (reference)[http://en.wikipedia.org/wiki/Arithmetic_mean] pub fn mean(v: &[T]) -> T where T: Float { let len = num::cast(v.len()).unwrap(); v.iter().fold(T::zero(), |acc: T, elem| acc + *elem) / len } /// The median is the number separating the higher half of a data sample, a population, or /// a probability distribution, from the lower half (reference)[http://en.wikipedia.org/wiki/Median) pub fn median(v: &[T]) -> T where T: Copy + Num + NumCast + PartialOrd { assert!(v.len() > 0); let mut scratch: Vec<&T> = Vec::with_capacity(v.len()); scratch.extend(v.iter()); quicksort(&mut scratch); let mid = scratch.len() / 2; if scratch.len() % 2 == 1 { *scratch[mid] } else { (*scratch[mid] + *scratch[mid-1]) / num::cast(2).unwrap() } } pub fn sum_square_deviations(v: &[T], c: Option) -> T where T: Float { let c = match c { Some(c) => c, None => mean(v), }; let sum = v.iter().map( |x| (*x - c) * (*x - c) ).fold(T::zero(), |acc, elem| acc + elem); assert!(sum >= T::zero(), "negative sum of square root deviations"); sum } /// (Sample variance)[http://en.wikipedia.org/wiki/Variance#Sample_variance] pub fn variance(v: &[T], xbar: Option) -> T where T: Float { assert!(v.len() > 1, "variance requires at least two data points"); let len: T = num::cast(v.len()).unwrap(); let sum = sum_square_deviations(v, xbar); sum / (len - T::one()) } /// (Population variance)[http://en.wikipedia.org/wiki/Variance#Population_variance] pub fn population_variance(v: &[T], mu: Option) -> T where T: Float { assert!(v.len() > 0, "population variance requires at least one data point"); let len: T = num::cast(v.len()).unwrap(); let sum = sum_square_deviations(v, mu); sum / len } /// Standard deviation is a measure that is used to quantify the amount of variation or /// dispersion of a set of data values. (reference)[http://en.wikipedia.org/wiki/Standard_deviation] pub fn standard_deviation(v: &[T], xbar: Option) -> T where T: Float { let var = variance(v, xbar); var.sqrt() } /// Population standard deviation is a measure that is used to quantify the amount of variation or /// dispersion of a set of data values. (reference)[http://en.wikipedia.org/wiki/Standard_deviation] pub fn population_standard_deviation(v: &[T], mu: Option) -> T where T: Float { let pvar = population_variance(v, mu); pvar.sqrt() } /// Standard score is a given datum's (signed) number of standard deviations above the mean. /// (reference)[http://en.wikipedia.org/wiki/Standard_score] /// Method returns a vector of scores for a vector of inputs. scores[n] is the score of v[n] pub fn standard_scores(v: &[T]) -> Vec where T: Float { let mean = mean(&v); let standard_deviation = standard_deviation(&v, None); let scores: Vec = v.iter().map(|val| (*val - mean)/standard_deviation).collect(); return scores; } #[inline(always)] fn select_pivot(v: &mut [T]) where T: Copy { let idx = rand::random::() % v.len(); let tmp = v[0]; v[0] = v[idx]; v[idx] = tmp; } fn partition(v: &mut [T]) -> usize where T: PartialOrd + Copy { select_pivot(v); let pivot = v[0]; let mut i = 0; let mut j = 0; let end = v.len() - 1; while i < end { i += 1; if v[i] < pivot { v[j] = v[i]; j += 1; v[i] = v[j]; } } v[j] = pivot; j } pub fn quicksort(v: &mut [T]) where T: PartialOrd + Copy { if v.len() <= 1 { return } let pivot = partition(v); quicksort(&mut v[..pivot]); quicksort(&mut v[(pivot+1)..]); } #[cfg(test)] mod tests { extern crate rand; extern crate num; use super::*; use num::Float; use num::abs; const EPSILON: f32 = 1e-6; #[test] fn test_mean() { let vec = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]; let diff = abs(mean(&vec) - 1.375); assert!(diff <= EPSILON); } #[test] fn test_median() { let vec = vec![1.0, 3.0]; let diff = abs(median(&vec) - 2.0); assert!(diff <= EPSILON); let vec = vec![1.0, 3.0, 5.0]; let diff = abs(median(&vec) - 3.0); assert!(diff <= EPSILON); let vec = vec![1.0, 3.0, 5.0, 7.0]; let diff = abs(median(&vec) - 4.0); assert!(diff <= EPSILON); } #[test] fn test_variance() { let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]; // result is within `epsilon` of expected value let expected = 1.428571; assert!((expected - variance(&v, None)).abs() < EPSILON); } #[test] fn test_population_variance() { let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]; // result is within `epsilon` of expected value let expected = 1.25; assert!((expected - population_variance(&v, None)).abs() < EPSILON); } #[test] fn test_standard_deviation() { let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]; // result is within `epsilon` of expected value let expected = 1.195229; assert!((expected - standard_deviation(&v, None)).abs() < EPSILON); } #[test] fn test_population_standard_deviation() { let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]; // result is within `epsilon` of expected value let expected = 1.118034; assert!((expected - population_standard_deviation(&v, None)).abs() < EPSILON); } #[test] fn test_standard_scores() { let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25]; let expected = vec![-1.150407536484354, -0.941242529850835, -0.941242529850835, -0.10458250331675945, 0.10458250331675945, 0.31374750995027834, 1.150407536484354, 1.5687375497513918]; assert!(expected == standard_scores(&v)); } #[test] fn test_qsort_empty() { let mut vec: Vec = vec![]; quicksort(&mut vec); assert_eq!(vec, vec![]); } #[test] fn test_qsort_small() { let len = 10; let mut vec = Vec::with_capacity(len); for _ in 0..len { vec.push(rand::random::()); } quicksort(&mut vec); for i in 0..(len-1) { assert!(vec[i] < vec[i+1], "sorted vectors must be monotonically increasing"); } } #[test] fn test_qsort_large() { let len = 1_000_000; let mut vec = Vec::with_capacity(len); for _ in 0..len { vec.push(rand::random::()); } quicksort(&mut vec); for i in 0..(len-1) { assert!(vec[i] < vec[i+1], "sorted vectors must be monotonically increasing"); } } #[test] fn test_qsort_sorted() { let len = 1_000; let mut vec = Vec::with_capacity(len); for n in 0..len { vec.push(n); } quicksort(&mut vec); for i in 0..(len-1) { assert!(vec[i] < vec[i+1], "sorted vectors must be monotonically increasing"); } } #[test] fn test_qsort_reverse_sorted() { let len = 1_000; let mut vec = Vec::with_capacity(len); for n in 0..len { vec.push(len-n); } quicksort(&mut vec); for i in 0..(len-1) { assert!(vec[i] < vec[i+1], "sorted vectors must be monotonically increasing"); } } } statistical-1.0.0/src/univariate_.rs010064400017500001750000000164601342423044500157300ustar0000000000000000// Copyright (c) 2015 Jeff Belgum // // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated // documentation files (the "Software"), to deal in the Software without restriction, including without // limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of // the Software, and to permit persons to whom the Software is furnished to do so, subject to the following // conditions: // // The above copyright notice and this permission notice shall be included in all copies or substantial portions // of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED // TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT // SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION // OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. extern crate rand; extern crate num; use std::collections::HashMap; use std::hash::Hash; use num::{Float, One, PrimInt, Zero}; use super::stats_ as stats; pub fn harmonic_mean(v: &[T]) -> T where T: Float { let invert = |x: &T| T::one() / *x; let sum_of_inverted = v.iter().map(invert).fold(T::zero(), |acc, elem| acc + elem); num::cast::(v.len()).unwrap() / sum_of_inverted } pub fn geometric_mean(v: &[T]) -> T where T: Float { let product = v.iter().fold(T::one(), |acc, elem| acc * *elem); let one_over_len = T::one() / num::cast(v.len()).unwrap(); product.powf(one_over_len) } pub fn quadratic_mean(v: &[T]) -> T where T: Float { let square = |x: &T| (*x).powi(2); let sum_of_squared = v.iter().map(square).fold(T::zero(), |acc, elem| acc + elem); (sum_of_squared / num::cast(v.len()).unwrap()).sqrt() } pub fn mode(v: &[T]) -> Option where T: Hash + Copy + Eq { match v.len() { 0 => None, 1 => Some(v[0]), _ => { let mut counter = HashMap::new(); for x in v.iter() { let count = counter.entry(x).or_insert(0); *count += 1; } let mut max = -1; let mut mode = None; for (val, count) in counter.iter() { if *count > max { max = *count; mode = Some(**val); } } mode } } } pub fn average_deviation(v: &[T], mean: Option) -> T where T: Float { let mean = mean.unwrap_or_else(|| stats::mean(v)); let dev = v.iter().map(|&x| (x-mean).abs()).fold(T::zero(), |acc, elem| acc + elem); dev / num::cast(v.len()).unwrap() } pub fn pearson_skewness(mean: T, mode: T, stdev: T) -> T where T: Float { (mean - mode) / stdev } pub fn skewness(v: &[T], mean: Option, pstdev: Option) -> T where T: Float { let m = stats::std_moment(v, stats::Degree::Three, mean, pstdev); let n = num::cast(v.len()).unwrap(); let skew = m / n; let k = ( n * ( n - T::one())).sqrt()/( n - num::cast(2).unwrap()); skew * k } pub fn pskewness(v: &[T], mean: Option, pstdev: Option) -> T where T: Float { let m = stats::std_moment(v, stats::Degree::Three, mean, pstdev); m / num::cast(v.len()).unwrap() } pub fn kurtosis(v: &[T], mean: Option, pstdev: Option) -> T where T: Float { let two = num::cast::(2.0).unwrap(); let three = num::cast::(3.0).unwrap(); let m = stats::std_moment(v, stats::Degree::Four, mean, pstdev); let n = num::cast(v.len()).unwrap(); let q = (n - T::one())/((n-two)*(n-three)); let gamma2 = m / n; let kurt = q * (( ( n + T::one() ) * gamma2) - ( (n-T::one()) * three )); kurt } pub fn pkurtosis(v: &[T], mean: Option, pstdev: Option) -> T where T: Float { let m = stats::std_moment(v, stats::Degree::Four, mean, pstdev); m / num::cast(v.len()).unwrap() - num::cast(3).unwrap() } pub fn standard_error_mean(stdev: T, sample_size: T, population_size: Option) -> T where T: Float { let mut err = stdev / sample_size.sqrt(); if let Some(p) = population_size { err = err * ((p - sample_size) / (p - T::one())).sqrt() } err } pub fn standard_error_skewness(sample_size: T) -> U where T: PrimInt, U: Float { (num::cast::(6.0).unwrap() / num::cast(sample_size).unwrap()).sqrt() } pub fn standard_error_kurtosis(sample_size: T) -> U where T: PrimInt, U: Float { (num::cast::(24.0).unwrap() / num::cast(sample_size).unwrap()).sqrt() } #[cfg(test)] mod test { use super::*; #[test] fn test_harmonic_mean() { let vec = vec![0.25, 0.5, 1.0, 1.0]; assert_eq!(harmonic_mean(&vec), 0.5); let vec = vec![0.5, 0.5, 0.5]; assert_eq!(harmonic_mean(&vec), 0.5); let vec = vec![1.0,2.0,4.0]; assert_eq!(harmonic_mean(&vec), 12.0/7.0); } #[test] fn test_geometric_mean() { let vec = vec![1.0, 2.0, 6.125, 12.25]; assert_eq!(geometric_mean(&vec), 3.5); } #[test] fn test_quadratic_mean() { let vec = vec![-3.0, -2.0, 0.0, 2.0, 3.0]; assert_eq!(quadratic_mean(&vec), 2.280350850198276); } #[test] fn test_mode() { let vec = vec![2,4,3,5,4,6,1,1,6,4,0,0]; assert_eq!(mode(&vec), Some(4)); let vec = vec![1]; assert_eq!(mode(&vec), Some(1)); } #[test] fn test_average_deviation() { let vec = vec![2.0, 2.25, 2.5, 2.5, 3.25]; assert_eq!(average_deviation(&vec, None), 0.3); assert_eq!(average_deviation(&vec, Some(2.75)), 0.45); } #[test] fn test_pearson_skewness() { assert_eq!(pearson_skewness(2.5, 2.25, 2.5), 0.1); assert_eq!(pearson_skewness(2.5, 5.75, 0.5), -6.5); } #[test] fn test_skewness() { let vec = vec![1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]; assert_eq!(skewness(&vec, None, None), 1.7146101353987853); let vec = vec![1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]; assert_eq!(skewness(&vec, Some(2.25), Some(1.0)), 1.4713288161532945); } #[test] fn test_pskewness() { let vec = vec![1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]; assert_eq!(pskewness(&vec, None, None), 1.3747465025469285); } #[test] fn test_kurtosis() { let vec = vec![1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]; assert_eq!(kurtosis(&vec, None, None), 3.036788927335642); let vec = vec![1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]; assert_eq!(kurtosis(&vec, Some(2.25), Some(1.0)), 2.3064453125); } #[test] fn test_pkurtosis() { let vec = vec![1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]; assert_eq!(pkurtosis(&vec, None, None), 0.7794232987312579); } #[test] fn test_standard_error_mean() { assert_eq!(standard_error_mean(2.0, 16.0, None), 0.5); } #[test] fn test_standard_error_skewness() { assert_eq!(standard_error_skewness::(15), 0.63245553203); } #[test] fn test_standard_error_kurtosis() { assert_eq!(standard_error_kurtosis::(15), 1.2649110640); } } statistical-1.0.0/.cargo_vcs_info.json0000644000000001120000000000000134070ustar00{ "git": { "sha1": "15576bca96aa1da28eba536e0c4ef20b9d47d618" } }