linreg-0.2.0/.github/workflows/ci.yml010060000017500001750000000015031357322340400157170ustar0000000000000000on: [push] name: CI jobs: build_and_test: name: Build and test lib runs-on: ubuntu-latest steps: - uses: actions/checkout@master - run: rustup component add clippy - name: Check formatting uses: actions-rs/cargo@v1 with: command: fmt args: -- --check - name: Check code uses: actions-rs/cargo@v1 with: command: check # TODO: Add RUSTFLAGS="-D warnings" somehow args: --all-features - name: Run clippy uses: actions-rs/clippy-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} args: --all-features -- -Dwarnings -Drust-2018-idioms - name: Build release uses: actions-rs/cargo@v1 with: command: build args: --release --all-features linreg-0.2.0/.gitignore010060000017500001750000000000371330673706600132050ustar0000000000000000 /target **/*.rs.bk Cargo.lock linreg-0.2.0/Cargo.toml.orig010060000017500001750000000007671357322344300141110ustar0000000000000000[package] edition = "2018" authors = ["49nord GmbH "] description = "Calculates linear regresssions of two-dimensional data. Does not use stdlib, only depends on numeric traits." documentation = "https://docs.rs/linreg" license = "MIT" name = "linreg" readme = "README.md" repository = "https://github.com/49nord/linreg-rs.git" version = "0.2.0" [dependencies] num-traits = "0.2.4" [dependencies.displaydoc] version = "0.1.5" default-features = false # Do not pull in `std` feature. linreg-0.2.0/Cargo.toml0000644000000017520000000000000103550ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "linreg" version = "0.2.0" authors = ["49nord GmbH "] description = "Calculates linear regresssions of two-dimensional data. Does not use stdlib, only depends on numeric traits." documentation = "https://docs.rs/linreg" readme = "README.md" license = "MIT" repository = "https://github.com/49nord/linreg-rs.git" [dependencies.displaydoc] version = "0.1.5" default-features = false [dependencies.num-traits] version = "0.2.4" linreg-0.2.0/LICENSE010060000017500001750000000020371330676432400122210ustar0000000000000000Copyright (c) 2018 49nord GmbH Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. linreg-0.2.0/README.md010060000017500001750000000006541357275740300125030ustar0000000000000000linreg-rs ========= `linreg` is a small crate that calculates linear regressions. It works without stdlib or memory allocation and has few dependencies. Example: ```rust let xs: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0]; let ys: Vec = vec![2.0, 4.0, 5.0, 4.0, 5.0]; assert_eq!(Some((0.6, 2.2)), linear_regression(&xs, &ys)); ``` It supports tuples, separate vectors for x and y values and template floating point types. linreg-0.2.0/src/lib.rs010060000017500001750000000242551357322340400131200ustar0000000000000000//! Linear regression //! //! `linreg` calculates linear regressions for two dimensional measurements, also known as //! [simple linear regression](https://en.wikipedia.org/wiki/Simple_linear_regression). //! //! Base for all calculations of linear regression is the simple model found in //! https://en.wikipedia.org/wiki/Ordinary_least_squares#Simple_linear_regression_model. //! //! ## Example use //! //! ```rust //! use linreg::{linear_regression, linear_regression_of}; //! //! // Example 1: x and y values stored in two different vectors //! let xs: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0]; //! let ys: Vec = vec![2.0, 4.0, 5.0, 4.0, 5.0]; //! //! assert_eq!(Ok((0.6, 2.2)), linear_regression(&xs, &ys)); //! //! //! // Example 2: x and y values stored as tuples //! let tuples: Vec<(f32, f32)> = vec![(1.0, 2.0), //! (2.0, 4.0), //! (3.0, 5.0), //! (4.0, 4.0), //! (5.0, 5.0)]; //! //! assert_eq!(Ok((0.6, 2.2)), linear_regression_of(&tuples)); //! //! //! // Example 3: directly operating on integer (converted to float as required) //! let xs: Vec = vec![1, 2, 3, 4, 5]; //! let ys: Vec = vec![2, 4, 5, 4, 5]; //! //! assert_eq!(Ok((0.6, 2.2)), linear_regression(&xs, &ys)); //! ``` #![no_std] extern crate num_traits; use num_traits::float::FloatCore; #[cfg(test)] #[macro_use] extern crate std; use core::iter::Iterator; use core::iter::Sum; use displaydoc::Display; /// The kinds of errors that can occur when calculating a linear regression. #[derive(Copy, Clone, Display, Debug, PartialEq)] pub enum Error { /// The slope is too steep to represent, approaching infinity. TooSteep, /// Failed to calculate mean. /// /// This means the input was empty or had too many elements. Mean, /// Lengths of the inputs are different. InputLenDif, /// Can't compute linear regression of zero elements NoElements, } /// Single-pass simple linear regression. /// /// Similar to `lin_reg`, but does not require a mean value to be computed in advance and thus /// does not require a second pass over the input data. /// /// Returns `Ok((slope, intercept))` of the regression line. /// /// # Errors /// /// Errors if the number of elements is too large to be represented as `F` or /// the slope is too steep to represent, approaching infinity. pub fn lin_reg_imprecise(xys: I) -> Result<(F, F), Error> where F: FloatCore, I: Iterator, { details::lin_reg_imprecise_components(xys)?.finish() } /// A module containing the building parts of the main API. /// You can use these if you want to have more control over the linear regression mod details { use super::Error; use num_traits::float::FloatCore; /// Low level linear regression primitive for pushing values instead of fetching them /// from an iterator #[derive(Debug)] pub struct Accumulator { x_mean: F, y_mean: F, x_mul_y_mean: F, x_squared_mean: F, n: usize, } impl Default for Accumulator { fn default() -> Self { Self::new() } } impl Accumulator { pub fn new() -> Self { Self { x_mean: F::zero(), y_mean: F::zero(), x_mul_y_mean: F::zero(), x_squared_mean: F::zero(), n: 0, } } pub fn push(&mut self, x: F, y: F) { self.x_mean = self.x_mean + x; self.y_mean = self.y_mean + y; self.x_mul_y_mean = self.x_mul_y_mean + x * y; self.x_squared_mean = self.x_squared_mean + x * x; self.n += 1; } pub fn normalize(&mut self) -> Result<(), Error> { match self.n { 1 => return Ok(()), 0 => return Err(Error::NoElements), _ => {} } let n = F::from(self.n).ok_or(Error::Mean)?; self.n = 1; self.x_mean = self.x_mean / n; self.y_mean = self.y_mean / n; self.x_mul_y_mean = self.x_mul_y_mean / n; self.x_squared_mean = self.x_squared_mean / n; Ok(()) } pub fn parts(mut self) -> Result<(F, F, F, F), Error> { self.normalize()?; let Self { x_mean, y_mean, x_mul_y_mean, x_squared_mean, .. } = self; Ok((x_mean, y_mean, x_mul_y_mean, x_squared_mean)) } pub fn finish(self) -> Result<(F, F), Error> { let (x_mean, y_mean, x_mul_y_mean, x_squared_mean) = self.parts()?; let slope = (x_mul_y_mean - x_mean * y_mean) / (x_squared_mean - x_mean * x_mean); let intercept = y_mean - slope * x_mean; if slope.is_nan() { return Err(Error::TooSteep); } Ok((slope, intercept)) } } pub fn lin_reg_imprecise_components(xys: I) -> Result, Error> where F: FloatCore, I: Iterator, { let mut acc = Accumulator::new(); for (x, y) in xys { acc.push(x, y); } acc.normalize()?; Ok(acc) } } /// Calculates a linear regression with a known mean. /// /// Lower-level linear regression function. Assumes that `x_mean` and `y_mean` /// have already been calculated. Returns `Error::DivByZero` if /// /// * the slope is too steep to represent, approaching infinity. /// /// Since there is a mean, this function assumes that `xs` and `ys` are both non-empty. /// /// Returns `Ok((slope, intercept))` of the regression line. pub fn lin_reg(xys: I, x_mean: F, y_mean: F) -> Result<(F, F), Error> where I: Iterator, F: FloatCore, { // SUM (x-mean(x))^2 let mut xxm2 = F::zero(); // SUM (x-mean(x)) (y-mean(y)) let mut xmym2 = F::zero(); for (x, y) in xys { xxm2 = xxm2 + (x - x_mean) * (x - x_mean); xmym2 = xmym2 + (x - x_mean) * (y - y_mean); } let slope = xmym2 / xxm2; // we check for divide-by-zero after the fact if slope.is_nan() { return Err(Error::TooSteep); } let intercept = y_mean - slope * x_mean; Ok((slope, intercept)) } /// Two-pass simple linear regression from slices. /// /// Calculates the linear regression from two slices, one for x- and one for y-values, by /// calculating the mean and then calling `lin_reg`. /// /// Returns `Ok(slope, intercept)` of the regression line. /// /// # Errors /// /// Returns an error if /// /// * `xs` and `ys` differ in length /// * `xs` or `ys` are empty /// * the slope is too steep to represent, approaching infinity /// * the number of elements cannot be represented as an `F` /// pub fn linear_regression(xs: &[X], ys: &[Y]) -> Result<(F, F), Error> where X: Clone + Into, Y: Clone + Into, F: FloatCore + Sum, { if xs.len() != ys.len() { return Err(Error::InputLenDif); } if xs.is_empty() { return Err(Error::Mean); } let x_sum: F = xs.iter().cloned().map(Into::into).sum(); let n = F::from(xs.len()).ok_or(Error::Mean)?; let x_mean = x_sum / n; let y_sum: F = ys.iter().cloned().map(Into::into).sum(); let y_mean = y_sum / n; lin_reg( xs.iter() .map(|i| i.clone().into()) .zip(ys.iter().map(|i| i.clone().into())), x_mean, y_mean, ) } /// Two-pass linear regression from tuples. /// /// Calculates the linear regression from a slice of tuple values by first calculating the mean /// before calling `lin_reg`. /// /// Returns `Ok(slope, intercept)` of the regression line. /// /// # Errors /// /// Returns an error if /// /// * `xys` is empty /// * the slope is too steep to represent, approaching infinity /// * the number of elements cannot be represented as an `F` pub fn linear_regression_of(xys: &[(X, Y)]) -> Result<(F, F), Error> where X: Clone + Into, Y: Clone + Into, F: FloatCore, { if xys.is_empty() { return Err(Error::Mean); } // We're handrolling the mean computation here, because our generic implementation can't handle tuples. // If we ran the generic impl on each tuple field, that would be very cache inefficient let n = F::from(xys.len()).ok_or(Error::Mean)?; let (x_sum, y_sum) = xys .iter() .cloned() .fold((F::zero(), F::zero()), |(sx, sy), (x, y)| { (sx + x.into(), sy + y.into()) }); let x_mean = x_sum / n; let y_mean = y_sum / n; lin_reg( xys.iter() .map(|(x, y)| (x.clone().into(), y.clone().into())), x_mean, y_mean, ) } #[cfg(test)] mod tests { use std::vec::Vec; use super::*; #[test] fn float_slices_regression() { let xs: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0]; let ys: Vec = vec![2.0, 4.0, 5.0, 4.0, 5.0]; assert_eq!(Ok((0.6, 2.2)), linear_regression(&xs, &ys)); } #[test] fn lin_reg_imprecises_vs_linreg() { let xs: Vec = vec![1.0, 2.0, 3.0, 4.0, 5.0]; let ys: Vec = vec![2.0, 4.0, 5.0, 4.0, 5.0]; let (x1, y1) = lin_reg_imprecise(xs.iter().cloned().zip(ys.iter().cloned())).unwrap(); let (x2, y2): (f64, f64) = linear_regression(&xs, &ys).unwrap(); assert!(f64::abs(x1 - x2) < 0.00001); assert!(f64::abs(y1 - y2) < 0.00001); } #[test] fn int_slices_regression() { let xs: Vec = vec![1, 2, 3, 4, 5]; let ys: Vec = vec![2, 4, 5, 4, 5]; assert_eq!(Ok((0.6, 2.2)), linear_regression(&xs, &ys)); } #[test] fn float_tuples_regression() { let tuples: Vec<(f32, f32)> = vec![(1.0, 2.0), (2.0, 4.0), (3.0, 5.0), (4.0, 4.0), (5.0, 5.0)]; assert_eq!(Ok((0.6, 2.2)), linear_regression_of(&tuples)); } #[test] fn int_tuples_regression() { let tuples: Vec<(u32, u32)> = vec![(1, 2), (2, 4), (3, 5), (4, 4), (5, 5)]; assert_eq!(Ok((0.6, 2.2)), linear_regression_of(&tuples)); } } linreg-0.2.0/.cargo_vcs_info.json0000644000000001120000000000000123440ustar00{ "git": { "sha1": "03e6a9a16f17e961eb82069d7011134ec274e23a" } }