histogram-0.11.0/.cargo_vcs_info.json0000644000000001470000000000100131120ustar { "git": { "sha1": "696f9581171962ad717a8b1eab13d50433c1091d" }, "path_in_vcs": "histogram" }histogram-0.11.0/Cargo.toml0000644000000022560000000000100111130ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "histogram" version = "0.11.0" authors = ["Brian Martin "] description = "A collection of histogram data structures" homepage = "https://github.com/pelikan-io/rustcommon" readme = "README.md" license = "MIT OR Apache-2.0" repository = "https://github.com/pelikan-io/rustcommon" [[bench]] name = "histogram" harness = false [dependencies.schemars] version = "0.8" optional = true [dependencies.serde] version = "1.0.144" features = ["derive"] optional = true [dependencies.thiserror] version = "1.0.47" [dev-dependencies.criterion] version = "0.5.1" [dev-dependencies.rand] version = "0.8.5" [features] schemars = [ "dep:schemars", "serde", ] serde = ["dep:serde"] histogram-0.11.0/Cargo.toml.orig000064400000000000000000000011761046102023000145740ustar 00000000000000[package] name = "histogram" version = "0.11.0" edition = "2021" authors = ["Brian Martin "] license = "MIT OR Apache-2.0" description = "A collection of histogram data structures" homepage = "https://github.com/pelikan-io/rustcommon" repository = "https://github.com/pelikan-io/rustcommon" [dependencies] schemars = { version = "0.8", optional = true } serde = { version = "1.0.144", features = ["derive"], optional = true } thiserror = "1.0.47" [dev-dependencies] criterion = "0.5.1" rand = "0.8.5" [features] schemars = ["dep:schemars", "serde"] serde = ["dep:serde"] [[bench]] name = "histogram" harness = false histogram-0.11.0/README.md000064400000000000000000000017031046102023000131600ustar 00000000000000# histogram A collection of histogram data structures which enable counting of occurrences of values and reporting on the distribution of observations. The implementations in this crate store counts for quantized value ranges using a fast indexing algorithm that maps values into either a linear range or a logarithmic range with linear subdivisions. This is similar to HDRHistogram but the indexing algorithm is modified to make increments faster. There are several implementations which target difference use-cases. See the documentation for details. ## Documentation See the [API Documentation] on docs.rs ## Support Create a [new issue](https://github.com/pelikan-io/rustcommon/issues/new) on GitHub. ## Authors * Brian Martin A full list of [contributors] can be found on GitHub. [API Documentation]: https://docs.rs/histogram/latest/histogram [contributors]: https://github.com/pelikan-io/rustcommon/graphs/contributors?type=a histogram-0.11.0/benches/histogram.rs000064400000000000000000000017561046102023000156630ustar 00000000000000use criterion::{criterion_group, criterion_main, Criterion, Throughput}; // To reduce duplication, we use this macro. It only works because the API for // all the histogram types is roughly the same for some operations. macro_rules! benchmark { ($name:tt, $histogram:ident, $c:ident) => { let mut group = $c.benchmark_group($name); group.throughput(Throughput::Elements(1)); group.bench_function("increment/1", |b| b.iter(|| $histogram.increment(1))); group.bench_function("increment/max", |b| { b.iter(|| $histogram.increment(u64::MAX)) }); group.finish(); }; } fn histogram(c: &mut Criterion) { let mut histogram = histogram::Histogram::new(7, 64).unwrap(); benchmark!("histogram", histogram, c); } fn atomic(c: &mut Criterion) { let histogram = histogram::AtomicHistogram::new(7, 64).unwrap(); benchmark!("atomic_histogram", histogram, c); } criterion_group!(benches, histogram, atomic); criterion_main!(benches); histogram-0.11.0/src/atomic.rs000064400000000000000000000132611046102023000143140ustar 00000000000000use crate::{Config, Error, Histogram}; use core::sync::atomic::{AtomicU64, Ordering}; /// A histogram that uses atomic 64bit counters for each bucket. /// /// Unlike the non-atomic variant, it cannot be used directly to report /// percentiles. Instead, a snapshot must be taken which captures the state of /// the histogram at a point in time. pub struct AtomicHistogram { config: Config, buckets: Box<[AtomicU64]>, } impl AtomicHistogram { /// Construct a new atomic histogram from the provided parameters. See the /// documentation for [`crate::Config`] to understand their meaning. pub fn new(p: u8, n: u8) -> Result { let config = Config::new(p, n)?; Ok(Self::with_config(&config)) } /// Creates a new atomic histogram using a provided [`crate::Config`]. pub fn with_config(config: &Config) -> Self { let mut buckets = Vec::with_capacity(config.total_buckets()); buckets.resize_with(config.total_buckets(), || AtomicU64::new(0)); Self { config: *config, buckets: buckets.into(), } } /// Increment the bucket that contains the value by one. pub fn increment(&self, value: u64) -> Result<(), Error> { self.add(value, 1) } /// Increment the bucket that contains the value by some count. pub fn add(&self, value: u64, count: u64) -> Result<(), Error> { let index = self.config.value_to_index(value)?; self.buckets[index].fetch_add(count, Ordering::Relaxed); Ok(()) } // NOTE: once stabilized, `target_has_atomic_load_store` is more correct. https://github.com/rust-lang/rust/issues/94039 #[cfg(target_has_atomic = "64")] /// Drains the bucket values into a new Histogram /// /// Unlike [`load`](AtomicHistogram::load), this method will reset all bucket values to zero. This uses [`AtomicU64::swap`] and is not available /// on platforms where [`AtomicU64::swap`] is not available. pub fn drain(&self) -> Histogram { let buckets: Vec = self .buckets .iter() .map(|bucket| bucket.swap(0, Ordering::Relaxed)) .collect(); Histogram { config: self.config, buckets: buckets.into(), } } /// Read the bucket values into a new `Histogram` pub fn load(&self) -> Histogram { let buckets: Vec = self .buckets .iter() .map(|bucket| bucket.load(Ordering::Relaxed)) .collect(); Histogram { config: self.config, buckets: buckets.into(), } } } #[cfg(test)] mod test { use crate::*; #[test] fn size() { assert_eq!(std::mem::size_of::(), 48); } #[cfg(target_has_atomic = "64")] #[test] /// Tests that drain properly resets buckets to 0 fn drain() { let histogram = AtomicHistogram::new(7, 64).unwrap(); for i in 0..=100 { let _ = histogram.increment(i); } let percentiles = histogram.drain(); assert_eq!( percentiles.percentile(50.0), Ok(Some(Bucket { count: 1, range: 50..=50, })) ); histogram.increment(1000).unwrap(); // after another load the map is empty let percentiles = histogram.drain(); assert_eq!( percentiles.percentile(50.0), Ok(Some(Bucket { count: 1, range: 1000..=1003, })) ); } #[test] // Tests percentiles fn percentiles() { let histogram = AtomicHistogram::new(7, 64).unwrap(); let percentiles = [25.0, 50.0, 75.0, 90.0, 99.0]; // check empty assert_eq!(histogram.load().percentiles(&percentiles), Ok(None)); for percentile in percentiles { assert_eq!(histogram.load().percentile(percentile), Ok(None)); } // populate and check percentiles for i in 0..=100 { let _ = histogram.increment(i); assert_eq!( histogram.load().percentile(0.0), Ok(Some(Bucket { count: 1, range: 0..=0, })) ); assert_eq!( histogram.load().percentile(100.0), Ok(Some(Bucket { count: 1, range: i..=i, })) ); } for percentile in percentiles { assert_eq!( histogram .load() .percentile(percentile) .map(|b| b.unwrap().end()), Ok(percentile as u64) ); } assert_eq!( histogram.load().percentile(99.9).map(|b| b.unwrap().end()), Ok(100) ); assert_eq!( histogram.load().percentile(-1.0), Err(Error::InvalidPercentile) ); assert_eq!( histogram.load().percentile(101.0), Err(Error::InvalidPercentile) ); let percentiles: Vec<(f64, u64)> = histogram .load() .percentiles(&[50.0, 90.0, 99.0, 99.9]) .unwrap() .unwrap() .iter() .map(|(p, b)| (*p, b.end())) .collect(); assert_eq!( percentiles, vec![(50.0, 50), (90.0, 90), (99.0, 99), (99.9, 100)] ); let _ = histogram.increment(1024); assert_eq!( histogram.load().percentile(99.9), Ok(Some(Bucket { count: 1, range: 1024..=1031, })) ); } } histogram-0.11.0/src/bucket.rs000064400000000000000000000014241046102023000143130ustar 00000000000000use core::ops::RangeInclusive; /// A bucket represents a quantized range of values and a count of observations /// that fall into that range. #[derive(Clone, Debug, PartialEq)] pub struct Bucket { pub(crate) count: u64, pub(crate) range: RangeInclusive, } impl Bucket { /// Returns the number of observations within the bucket's range. pub fn count(&self) -> u64 { self.count } /// Returns the range for the bucket. pub fn range(&self) -> RangeInclusive { self.range.clone() } /// Returns the inclusive lower bound for the bucket. pub fn start(&self) -> u64 { *self.range.start() } /// Returns the inclusive upper bound for the bucket. pub fn end(&self) -> u64 { *self.range.end() } } histogram-0.11.0/src/config.rs000064400000000000000000000256461046102023000143170ustar 00000000000000use crate::Error; use core::ops::RangeInclusive; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; /// The configuration of a histogram which determines the bucketing strategy and /// therefore the relative error and memory utilization of a histogram. /// * `grouping_power` - controls the number of buckets that are used to span /// consecutive powers of two. Lower values result in less memory usage since /// fewer buckets will be created. However, this will result in larger /// relative error as each bucket represents a wider range of values. /// * `max_value_power` - controls the largest value which can be stored in the /// histogram. `2^(max_value_power) - 1` is the inclusive upper bound for the /// representable range of values. /// /// # How to choose parameters for your data /// Please see for an /// in-depth discussion about the bucketing strategy and an interactive /// calculator that lets you explore how these parameters result in histograms /// with varying error guarantees and memory utilization requirements. /// /// # The short version /// ## Grouping Power /// `grouping_power` should be set such that `2^(-1 * grouping_power)` is an /// acceptable relative error. Rephrased, we can plug-in the acceptable /// relative error into `grouping_power = ceil(log2(1/e))`. For example, if we /// want to limit the error to 0.1% (0.001) we should set `grouping_power = 7`. /// /// ## Max Value Power /// `max_value_power` should be the closest power of 2 that is larger than the /// largest value you expect in your data. If your only guarantee is that the /// values are all `u64`, then setting this to `64` may be reasonable if you /// can tolerate a bit of relative error. /// /// ## Resulting size /// /// If we want to allow any value in a range of unsigned types, the amount of /// memory for the histogram is approximately: /// /// | power | error | u16 | u32 | u64 | /// |-------|-------|---------|---------|---------| /// | 2 | 25% | 0.6 KiB | 1 KiB | 2 KiB | /// | 3 | 12.5% | 1 KiB | 2 KiB | 4 KiB | /// | 4 | 6.25% | 2 KiB | 4 KiB | 8 KiB | /// | 5 | 3.13% | 3 KiB | 7 KiB | 15 KiB | /// | 6 | 1.56% | 6 KiB | 14 KiB | 30 KiB | /// | 7 | .781% | 10 KiB | 26 KiB | 58 KiB | /// | 8 | .391% | 18 KiB | 50 KiB | 114 KiB | /// | 9 | .195% | 32 KiB | 96 KiB | 224 KiB | /// | 10 | .098% | 56 KiB | 184 KiB | 440 KiB | /// | 11 | .049% | 96 KiB | 352 KiB | 864 KiB | /// | 12 | .025% | 160 KiB | 672 KiB | 1.7 MiB | /// /// # Constraints: /// * `max_value_power` must be in the range `0..=64` /// * `max_value_power` must be greater than `grouping_power #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] pub struct Config { max: u64, grouping_power: u8, max_value_power: u8, cutoff_power: u8, cutoff_value: u64, lower_bin_count: u32, upper_bin_divisions: u32, upper_bin_count: u32, } impl Config { /// Create a new histogram `Config` from the parameters. See the struct /// documentation [`crate::Config`] for the meaning of the parameters and /// their constraints. pub const fn new(grouping_power: u8, max_value_power: u8) -> Result { // we only allow values up to 2^64 if max_value_power > 64 { return Err(Error::MaxPowerTooHigh); } // check that the other parameters make sense together if grouping_power >= max_value_power { return Err(Error::MaxPowerTooLow); } // the cutoff is the point at which the linear range divisions and the // logarithmic range subdivisions diverge. // // for example: // when a = 0, the linear range has bins with width 1. // if b = 7 the logarithmic range has 128 subdivisions. // this means that for 0..128 we must be representing the values exactly // but we also represent 128..256 exactly since the subdivisions divide // that range into bins with the same width as the linear portion. // // therefore our cutoff power = a + b + 1 // note: because a + b must be less than n which is a u8, a + b + 1 must // be less than or equal to u8::MAX. This means our cutoff power will // always fit in a u8 let cutoff_power = grouping_power + 1; let cutoff_value = 2_u64.pow(cutoff_power as u32); let lower_bin_width = 2_u32.pow(0); let upper_bin_divisions = 2_u32.pow(grouping_power as u32); let max = if max_value_power == 64 { u64::MAX } else { 2_u64.pow(max_value_power as u32) }; let lower_bin_count = (cutoff_value / lower_bin_width as u64) as u32; let upper_bin_count = (max_value_power - cutoff_power) as u32 * upper_bin_divisions; Ok(Self { max, grouping_power, max_value_power, cutoff_power, cutoff_value, lower_bin_count, upper_bin_divisions, upper_bin_count, }) } /// Returns the grouping power that was used to create this configuration. pub const fn grouping_power(&self) -> u8 { self.grouping_power } /// Returns the max value power that was used to create this configuration. pub const fn max_value_power(&self) -> u8 { self.max_value_power } /// Returns the relative error (in percentage) of this configuration. This /// only applies to the logarithmic bins of the histogram (linear bins have /// a width of 1 and no error). For histograms with no logarithmic bins, /// error for the entire histogram is zero. pub fn error(&self) -> f64 { match self.grouping_power == self.max_value_power - 1 { true => 0.0, false => 100.0 / 2_u64.pow(self.grouping_power as u32) as f64, } } /// Return the total number of buckets needed for this config. pub const fn total_buckets(&self) -> usize { (self.lower_bin_count + self.upper_bin_count) as usize } /// Converts a value to a bucket index. Returns an error if the value is /// outside of the range for the config. pub(crate) fn value_to_index(&self, value: u64) -> Result { if value < self.cutoff_value { return Ok(value as usize); } if value > self.max { return Err(Error::OutOfRange); } let power = 63 - value.leading_zeros(); let log_bin = power - self.cutoff_power as u32; let offset = (value - (1 << power)) >> (power - self.grouping_power as u32); Ok((self.lower_bin_count + log_bin * self.upper_bin_divisions + offset as u32) as usize) } /// Convert a bucket index to a lower bound. pub(crate) fn index_to_lower_bound(&self, index: usize) -> u64 { let g = index as u64 >> self.grouping_power; let h = index as u64 - g * (1 << self.grouping_power); if g < 1 { h } else { (1 << (self.grouping_power as u64 + g - 1)) + (1 << (g - 1)) * h } } /// Convert a bucket index to a upper inclusive bound. pub(crate) fn index_to_upper_bound(&self, index: usize) -> u64 { if index as u32 == self.lower_bin_count + self.upper_bin_count - 1 { return self.max; } let g = index as u64 >> self.grouping_power; let h = index as u64 - g * (1 << self.grouping_power) + 1; if g < 1 { h - 1 } else { (1 << (self.grouping_power as u64 + g - 1)) + (1 << (g - 1)) * h - 1 } } /// Convert a bucket index to a range. pub(crate) fn index_to_range(&self, index: usize) -> RangeInclusive { self.index_to_lower_bound(index)..=self.index_to_upper_bound(index) } } #[cfg(test)] mod tests { use super::*; #[test] fn sizes() { assert_eq!(std::mem::size_of::(), 32); } #[test] // Test that the number of buckets matches the expected count fn total_buckets() { let config = Config::new(2, 64).unwrap(); assert_eq!(config.total_buckets(), 252); let config = Config::new(7, 64).unwrap(); assert_eq!(config.total_buckets(), 7424); let config = Config::new(14, 64).unwrap(); assert_eq!(config.total_buckets(), 835_584); let config = Config::new(2, 4).unwrap(); assert_eq!(config.total_buckets(), 12); } #[test] // Test value to index conversions fn value_to_idx() { let config = Config::new(7, 64).unwrap(); assert_eq!(config.value_to_index(0), Ok(0)); assert_eq!(config.value_to_index(1), Ok(1)); assert_eq!(config.value_to_index(256), Ok(256)); assert_eq!(config.value_to_index(257), Ok(256)); assert_eq!(config.value_to_index(258), Ok(257)); assert_eq!(config.value_to_index(512), Ok(384)); assert_eq!(config.value_to_index(515), Ok(384)); assert_eq!(config.value_to_index(516), Ok(385)); assert_eq!(config.value_to_index(1024), Ok(512)); assert_eq!(config.value_to_index(1031), Ok(512)); assert_eq!(config.value_to_index(1032), Ok(513)); assert_eq!(config.value_to_index(u64::MAX - 1), Ok(7423)); assert_eq!(config.value_to_index(u64::MAX), Ok(7423)); } #[test] // Test index to lower bound conversion fn idx_to_lower_bound() { let config = Config::new(7, 64).unwrap(); assert_eq!(config.index_to_lower_bound(0), 0); assert_eq!(config.index_to_lower_bound(1), 1); assert_eq!(config.index_to_lower_bound(256), 256); assert_eq!(config.index_to_lower_bound(384), 512); assert_eq!(config.index_to_lower_bound(512), 1024); assert_eq!( config.index_to_lower_bound(7423), 18_374_686_479_671_623_680 ); } #[test] // Test index to upper bound conversion fn idx_to_upper_bound() { let config = Config::new(7, 64).unwrap(); assert_eq!(config.index_to_upper_bound(0), 0); assert_eq!(config.index_to_upper_bound(1), 1); assert_eq!(config.index_to_upper_bound(256), 257); assert_eq!(config.index_to_upper_bound(384), 515); assert_eq!(config.index_to_upper_bound(512), 1031); assert_eq!(config.index_to_upper_bound(7423), u64::MAX); } #[test] // Test index to range conversion fn idx_to_range() { let config = Config::new(7, 64).unwrap(); assert_eq!(config.index_to_range(0), 0..=0); assert_eq!(config.index_to_range(1), 1..=1); assert_eq!(config.index_to_range(256), 256..=257); assert_eq!(config.index_to_range(384), 512..=515); assert_eq!(config.index_to_range(512), 1024..=1031); assert_eq!( config.index_to_range(7423), 18_374_686_479_671_623_680..=u64::MAX ); } } histogram-0.11.0/src/errors.rs000064400000000000000000000014761046102023000143610ustar 00000000000000use thiserror::Error; /// Errors returned for histogram construction and operations. #[non_exhaustive] #[derive(Error, Debug, PartialEq)] pub enum Error { #[error("max power is too high, check that n <= 64")] MaxPowerTooHigh, #[error("max power is too low, check that a + b < n")] MaxPowerTooLow, #[error("invalid percentile, must be in range 0.0..=100.0")] InvalidPercentile, #[error("the value is outside of the storable range")] OutOfRange, #[error("the histogram parameters are incompatible")] IncompatibleParameters, #[error("the snapshot time ranges do not allow this operation")] IncompatibleTimeRange, #[error("an overflow occurred")] Overflow, #[error("an underflow occurred")] Underflow, #[error("the histogram is not a subset")] InvalidSubset, } histogram-0.11.0/src/lib.rs000064400000000000000000000013501046102023000136020ustar 00000000000000//! This crate provides histogram implementations that are conceptually similar //! to HdrHistogram, with modifications to the bucket construction and indexing //! algorithms that we believe provide a simpler implementation and more //! efficient runtime compared to the reference implementation of HdrHistogram. //! //! # Goals //! * simple implementation //! * fine-grained configuration //! * efficient runtime //! //! # Background //! Please see: mod atomic; mod bucket; mod config; mod errors; mod sparse; mod standard; pub use atomic::AtomicHistogram; pub use bucket::Bucket; pub use config::Config; pub use errors::Error; pub use sparse::SparseHistogram; pub use standard::Histogram; histogram-0.11.0/src/sparse.rs000064400000000000000000000413151046102023000143360ustar 00000000000000use crate::{Bucket, Config, Error, Histogram}; /// This histogram is a sparse, columnar representation of the regular /// Histogram. It is significantly smaller than a regular Histogram /// when a large number of buckets are zero, which is a frequent /// occurence. It stores an individual vector for each field /// of non-zero buckets. Assuming index[0] = n, (index[0], count[0]) /// corresponds to the nth bucket. #[derive(Clone, Debug, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] pub struct SparseHistogram { /// parameters representing the resolution and the range of /// the histogram tracking request latencies pub config: Config, /// indices for the non-zero buckets in the histogram pub index: Vec, /// histogram bucket counts corresponding to the indices pub count: Vec, } impl SparseHistogram { /// Construct a new histogram from the provided parameters. See the /// documentation for [`crate::Config`] to understand their meaning. pub fn new(grouping_power: u8, max_value_power: u8) -> Result { let config = Config::new(grouping_power, max_value_power)?; Ok(Self::with_config(&config)) } /// Creates a new histogram using a provided [`crate::Config`]. pub fn with_config(config: &Config) -> Self { Self { config: *config, index: Vec::new(), count: Vec::new(), } } /// Helper function to store a bucket in the histogram. fn add_bucket(&mut self, idx: usize, n: u64) { if n != 0 { self.index.push(idx); self.count.push(n); } } /// Adds the other histogram to this histogram and returns the result as a /// new histogram. /// /// An error is returned if the two histograms have incompatible parameters. /// Buckets which have values in both histograms are allowed to wrap. #[allow(clippy::comparison_chain)] pub fn wrapping_add(&self, h: &SparseHistogram) -> Result { if self.config != h.config { return Err(Error::IncompatibleParameters); } let mut histogram = SparseHistogram::with_config(&self.config); // Sort and merge buckets from both histograms let (mut i, mut j) = (0, 0); while i < self.index.len() && j < h.index.len() { let (k1, v1) = (self.index[i], self.count[i]); let (k2, v2) = (h.index[j], h.count[j]); if k1 == k2 { histogram.add_bucket(k1, v1 + v2); (i, j) = (i + 1, j + 1); } else if k1 < k2 { histogram.add_bucket(k1, v1); i += 1; } else { histogram.add_bucket(k2, v2); j += 1; } } // Fill remaining values, if any, from the left histogram if i < self.index.len() { histogram.index.extend(&self.index[i..self.index.len()]); histogram.count.extend(&self.count[i..self.count.len()]); } // Fill remaining values, if any, from the right histogram if j < h.index.len() { histogram.index.extend(&h.index[j..h.index.len()]); histogram.count.extend(&h.count[j..h.count.len()]); } Ok(histogram) } /// Subtracts the other histogram to this histogram and returns the result as a /// new histogram. The other histogram is expected to be a subset of the current /// histogram, i.e., for every bucket in the other histogram should have a /// count less than or equal to the corresponding bucket in this histogram. /// /// An error is returned if the two histograms have incompatible parameters /// or if the other histogram is not a subset of this histogram. #[allow(clippy::comparison_chain)] pub fn checked_sub(&self, h: &SparseHistogram) -> Result { if self.config != h.config { return Err(Error::IncompatibleParameters); } let mut histogram = SparseHistogram::with_config(&self.config); // Sort and merge buckets from both histograms let (mut i, mut j) = (0, 0); while i < self.index.len() && j < h.index.len() { let (k1, v1) = (self.index[i], self.count[i]); let (k2, v2) = (h.index[j], h.count[j]); if k1 == k2 { let v = v1.checked_sub(v2).ok_or(Error::Underflow)?; if v != 0 { histogram.add_bucket(k1, v); } (i, j) = (i + 1, j + 1); } else if k1 < k2 { histogram.add_bucket(k1, v1); i += 1; } else { // Other histogram has a bucket not present in this histogram, // i.e., it is not a subset of this histogram return Err(Error::InvalidSubset); } } // Check that the subset histogram has been consumed if j < h.index.len() { return Err(Error::InvalidSubset); } // Fill remaining bucets, if any, from the superset histogram if i < self.index.len() { histogram.index.extend(&self.index[i..self.index.len()]); histogram.count.extend(&self.count[i..self.count.len()]); } Ok(histogram) } /// Return a collection of percentiles from this histogram. /// /// Each percentile should be in the inclusive range `0.0..=100.0`. For /// example, the 50th percentile (median) can be found using `50.0`. /// /// The results will be sorted by the percentile. pub fn percentiles(&self, percentiles: &[f64]) -> Result>, Error> { // validate all the percentiles if percentiles.is_empty() { return Err(Error::InvalidPercentile); } for percentile in percentiles { if !(0.0..=100.0).contains(percentile) { return Err(Error::InvalidPercentile); } } let total: u128 = self.count.iter().map(|v| *v as u128).sum(); // empty histogram, no percentiles available if total == 0 { return Ok(None); } // sort the requested percentiles so we can find them in a single pass let mut percentiles = percentiles.to_vec(); percentiles.sort_by(|a, b| a.partial_cmp(b).unwrap()); let searches: Vec = percentiles .iter() .map(|p| ((total as f64) * *p / 100.0).ceil() as usize) .collect(); let mut search_idx = 0; let mut result: Vec<(f64, Bucket)> = Vec::with_capacity(percentiles.len()); let mut seen: usize = 0; for (idx, count) in self.index.iter().zip(self.count.iter()) { seen += *count as usize; while search_idx < searches.len() && seen >= searches[search_idx] { result.push(( percentiles[search_idx], Bucket { count: *count, range: self.config.index_to_range(*idx), }, )); search_idx += 1; } } Ok(Some(result)) } /// Return a single percentile from this histogram. /// /// The percentile should be in the inclusive range `0.0..=100.0`. For /// example, the 50th percentile (median) can be found using `50.0`. pub fn percentile(&self, percentile: f64) -> Result, Error> { self.percentiles(&[percentile]) .map(|v| v.map(|x| x.first().unwrap().1.clone())) } /// Returns a new histogram with a reduced grouping power. The reduced /// grouping power should lie in the range (0..existing grouping power). /// /// This works by iterating over every bucket in the existing histogram /// and inserting the contained values into the new histogram. While we /// do not know the exact values of the data points (only that they lie /// within the bucket's range), it does not matter since the bucket is /// not split during downsampling and any value can be used. pub fn downsample(&self, grouping_power: u8) -> Result { if grouping_power >= self.config.grouping_power() { return Err(Error::MaxPowerTooLow); } let config = Config::new(grouping_power, self.config.max_value_power())?; let mut histogram = SparseHistogram::with_config(&config); // Multiple buckets in the old histogram will map to the same bucket // in the new histogram, so we have to aggregate bucket values from the // old histogram before inserting a bucket into the new downsampled // histogram. However, mappings between the histograms monotonically // increase, so once a bucket in the old histogram maps to a higher // bucket in the new histogram than is currently being aggregated, // the bucket can be sealed and inserted into the new histogram. let mut aggregating_idx: usize = 0; let mut aggregating_count: u64 = 0; for (idx, n) in self.index.iter().zip(self.count.iter()) { let new_idx = config.value_to_index(self.config.index_to_lower_bound(*idx))?; // If it maps to the currently aggregating bucket, merge counts if new_idx == aggregating_idx { aggregating_count += n; continue; } // Does not map to the aggregating bucket, so seal and store that bucket histogram.add_bucket(aggregating_idx, aggregating_count); // Start tracking this bucket as the current aggregating bucket aggregating_idx = new_idx; aggregating_count = *n; } // Add the final aggregated bucket histogram.add_bucket(aggregating_idx, aggregating_count); Ok(histogram) } } impl<'a> IntoIterator for &'a SparseHistogram { type Item = Bucket; type IntoIter = Iter<'a>; fn into_iter(self) -> Self::IntoIter { Iter { index: 0, histogram: self, } } } /// An iterator across the histogram buckets. pub struct Iter<'a> { index: usize, histogram: &'a SparseHistogram, } impl<'a> Iterator for Iter<'a> { type Item = Bucket; fn next(&mut self) -> Option<::Item> { if self.index >= self.histogram.index.len() { return None; } let bucket = Bucket { count: self.histogram.count[self.index], range: self .histogram .config .index_to_range(self.histogram.index[self.index]), }; self.index += 1; Some(bucket) } } impl From<&Histogram> for SparseHistogram { fn from(histogram: &Histogram) -> Self { let mut index = Vec::new(); let mut count = Vec::new(); for (idx, n) in histogram.as_slice().iter().enumerate() { if *n > 0 { index.push(idx); count.push(*n); } } Self { config: histogram.config(), index, count, } } } #[cfg(test)] mod tests { use rand::Rng; use std::collections::HashMap; use super::*; use crate::standard::Histogram; #[test] fn wrapping_add() { let config = Config::new(7, 32).unwrap(); let h1 = SparseHistogram { config, index: vec![1, 3, 5], count: vec![6, 12, 7], }; let h2 = SparseHistogram::with_config(&config); let h3 = SparseHistogram { config, index: vec![2, 3, 6, 11, 13], count: vec![5, 7, 3, 15, 6], }; let hdiff = SparseHistogram::new(6, 16).unwrap(); let h = h1.wrapping_add(&hdiff); assert_eq!(h, Err(Error::IncompatibleParameters)); let h = h1.wrapping_add(&h2).unwrap(); assert_eq!(h.index, vec![1, 3, 5]); assert_eq!(h.count, vec![6, 12, 7]); let h = h2.wrapping_add(&h3).unwrap(); assert_eq!(h.index, vec![2, 3, 6, 11, 13]); assert_eq!(h.count, vec![5, 7, 3, 15, 6]); let h = h1.wrapping_add(&h3).unwrap(); assert_eq!(h.index, vec![1, 2, 3, 5, 6, 11, 13]); assert_eq!(h.count, vec![6, 5, 19, 7, 3, 15, 6]); } #[test] fn checked_sub() { let config = Config::new(7, 32).unwrap(); let h1 = SparseHistogram { config, index: vec![1, 3, 5], count: vec![6, 12, 7], }; let hparams = SparseHistogram::new(6, 16).unwrap(); let h = h1.checked_sub(&hparams); assert_eq!(h, Err(Error::IncompatibleParameters)); let hempty = SparseHistogram::with_config(&config); let h = h1.checked_sub(&hempty).unwrap(); assert_eq!(h.index, vec![1, 3, 5]); assert_eq!(h.count, vec![6, 12, 7]); let hclone = h1.clone(); let h = h1.checked_sub(&hclone).unwrap(); assert!(h.index.is_empty()); assert!(h.count.is_empty()); let hlarger = SparseHistogram { config, index: vec![1, 3, 5], count: vec![4, 13, 7], }; let h = h1.checked_sub(&hlarger); assert_eq!(h, Err(Error::Underflow)); let hmore = SparseHistogram { config, index: vec![1, 5, 7], count: vec![4, 7, 1], }; let h = h1.checked_sub(&hmore); assert_eq!(h, Err(Error::InvalidSubset)); let hdiff = SparseHistogram { config, index: vec![1, 2, 5], count: vec![4, 1, 7], }; let h = h1.checked_sub(&hdiff); assert_eq!(h, Err(Error::InvalidSubset)); let hsubset = SparseHistogram { config, index: vec![1, 3], count: vec![5, 9], }; let h = h1.checked_sub(&hsubset).unwrap(); assert_eq!(h.index, vec![1, 3, 5]); assert_eq!(h.count, vec![1, 3, 7]); } #[test] fn percentiles() { let mut hstandard = Histogram::new(4, 10).unwrap(); let hempty = SparseHistogram::from(&hstandard); for v in 1..1024 { let _ = hstandard.increment(v); } let hsparse = SparseHistogram::from(&hstandard); let percentiles = [1.0, 10.0, 25.0, 50.0, 75.0, 90.0, 99.0, 99.9]; for percentile in percentiles { let bempty = hempty.percentile(percentile).unwrap(); let bstandard = hstandard.percentile(percentile).unwrap(); let bsparse = hsparse.percentile(percentile).unwrap(); assert_eq!(bempty, None); assert_eq!(bsparse, bstandard); } assert_eq!(hempty.percentiles(&percentiles), Ok(None)); assert_eq!( hstandard.percentiles(&percentiles).unwrap(), hsparse.percentiles(&percentiles).unwrap() ); } fn compare_histograms(hstandard: &Histogram, hsparse: &SparseHistogram) { assert_eq!(hstandard.config(), hsparse.config); let mut buckets: HashMap = HashMap::new(); for (idx, count) in hsparse.index.iter().zip(hsparse.count.iter()) { let _ = buckets.insert(*idx, *count); } for (idx, count) in hstandard.as_slice().iter().enumerate() { if *count > 0 { let v = buckets.get(&idx).unwrap(); assert_eq!(*v, *count); } } } #[test] fn snapshot() { let mut hstandard = Histogram::new(5, 10).unwrap(); for v in 1..1024 { let _ = hstandard.increment(v); } // Convert to sparse and store buckets in a hash for random lookup let hsparse = SparseHistogram::from(&hstandard); compare_histograms(&hstandard, &hsparse); } #[test] fn downsample() { let mut histogram = Histogram::new(8, 32).unwrap(); let mut rng = rand::thread_rng(); // Generate 10,000 values to store in a sorted array and a histogram for _ in 0..10000 { let v: u64 = rng.gen_range(1..2_u64.pow(histogram.config.max_value_power() as u32)); let _ = histogram.increment(v); } let hsparse = SparseHistogram::from(&histogram); compare_histograms(&histogram, &hsparse); // Downsample and compare heck the percentiles lie within error margin let grouping_power = histogram.config.grouping_power(); for factor in 1..grouping_power { let reduced_gp = grouping_power - factor; let h1 = histogram.downsample(reduced_gp).unwrap(); let h2 = hsparse.downsample(reduced_gp).unwrap(); compare_histograms(&h1, &h2); } } } histogram-0.11.0/src/standard.rs000064400000000000000000000407351046102023000146460ustar 00000000000000use crate::{Bucket, Config, Error, SparseHistogram}; /// A histogram that uses plain 64bit counters for each bucket. #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Histogram { pub(crate) config: Config, pub(crate) buckets: Box<[u64]>, } impl Histogram { /// Construct a new histogram from the provided parameters. See the /// documentation for [`crate::Config`] to understand their meaning. pub fn new(grouping_power: u8, max_value_power: u8) -> Result { let config = Config::new(grouping_power, max_value_power)?; Ok(Self::with_config(&config)) } /// Creates a new histogram using a provided [`crate::Config`]. pub fn with_config(config: &Config) -> Self { let buckets: Box<[u64]> = vec![0; config.total_buckets()].into(); Self { config: *config, buckets, } } /// Creates a new histogram using a provided [`crate::Config`] and the /// provided collection of buckets. pub fn from_buckets( grouping_power: u8, max_value_power: u8, buckets: Vec, ) -> Result { let config = Config::new(grouping_power, max_value_power)?; if config.total_buckets() != buckets.len() { return Err(Error::IncompatibleParameters); } Ok(Self { config, buckets: buckets.into(), }) } /// Increment the counter for the bucket corresponding to the provided value /// by one. pub fn increment(&mut self, value: u64) -> Result<(), Error> { self.add(value, 1) } /// Add some count to the counter for the bucket corresponding to the /// provided value pub fn add(&mut self, value: u64, count: u64) -> Result<(), Error> { let index = self.config.value_to_index(value)?; self.buckets[index] = self.buckets[index].wrapping_add(count); Ok(()) } /// Get a reference to the raw counters. pub fn as_slice(&self) -> &[u64] { &self.buckets } /// Get a mutable reference to the raw counters. pub fn as_mut_slice(&mut self) -> &mut [u64] { &mut self.buckets } /// Return a collection of percentiles from this histogram. /// /// Each percentile should be in the inclusive range `0.0..=100.0`. For /// example, the 50th percentile (median) can be found using `50.0`. /// /// The results will be sorted by the percentile. pub fn percentiles(&self, percentiles: &[f64]) -> Result>, Error> { // get the total count let total_count: u128 = self.buckets.iter().map(|v| *v as u128).sum(); // sort the requested percentiles so we can find them in a single pass let mut percentiles = percentiles.to_vec(); percentiles.sort_by(|a, b| a.partial_cmp(b).unwrap()); // validate all the percentiles for percentile in &percentiles { if !(0.0..=100.0).contains(percentile) { return Err(Error::InvalidPercentile); } } // empty histogram, no percentiles available if total_count == 0 { return Ok(None); } let mut bucket_idx = 0; let mut partial_sum = self.buckets[bucket_idx] as u128; let result: Vec<(f64, Bucket)> = percentiles .iter() .filter_map(|percentile| { let count = (percentile / 100.0 * total_count as f64).ceil() as u128; loop { // found the matching bucket index for this percentile if partial_sum >= count { return Some(( *percentile, Bucket { count: self.buckets[bucket_idx], range: self.config.index_to_range(bucket_idx), }, )); } // check if we have reached the end of the buckets if bucket_idx == (self.buckets.len() - 1) { break; } // otherwise, increment the bucket index, partial sum, and loop bucket_idx += 1; partial_sum += self.buckets[bucket_idx] as u128; } None }) .collect(); Ok(Some(result)) } /// Return a single percentile from this histogram. /// /// The percentile should be in the inclusive range `0.0..=100.0`. For /// example, the 50th percentile (median) can be found using `50.0`. pub fn percentile(&self, percentile: f64) -> Result, Error> { self.percentiles(&[percentile]) .map(|v| v.map(|x| x.first().unwrap().1.clone())) } /// Returns a new histogram with a reduced grouping power. The reduced /// grouping power should lie in the range (0..existing grouping power). /// /// The difference in grouping powers determines how much histogram size /// is reduced by, with every step approximately halving the total /// number of buckets (and hence total size of the histogram), while /// doubling the relative error. /// /// This works by iterating over every bucket in the existing histogram /// and inserting the contained values into the new histogram. While we /// do not know the exact values of the data points (only that they lie /// within the bucket's range), it does not matter since the bucket is /// not split during downsampling and any value can be used. pub fn downsample(&self, grouping_power: u8) -> Result { if grouping_power >= self.config.grouping_power() { return Err(Error::MaxPowerTooLow); } let mut histogram = Histogram::new(grouping_power, self.config.max_value_power())?; for (i, n) in self.as_slice().iter().enumerate() { // Skip empty buckets if *n != 0 { let val = self.config.index_to_lower_bound(i); histogram.add(val, *n)?; } } Ok(histogram) } /// Adds the other histogram to this histogram and returns the result as a /// new histogram. /// /// An error is returned if the two histograms have incompatible parameters /// or if there is an overflow. pub fn checked_add(&self, other: &Histogram) -> Result { if self.config != other.config { return Err(Error::IncompatibleParameters); } let mut result = self.clone(); for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) { *this = this.checked_add(*other).ok_or(Error::Overflow)?; } Ok(result) } /// Adds the other histogram to this histogram and returns the result as a /// new histogram. /// /// An error is returned if the two histograms have incompatible parameters. pub fn wrapping_add(&self, other: &Histogram) -> Result { if self.config != other.config { return Err(Error::IncompatibleParameters); } let mut result = self.clone(); for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) { *this = this.wrapping_add(*other); } Ok(result) } /// Subtracts the other histogram from this histogram and returns the result /// as a new histogram. /// /// An error is returned if the two histograms have incompatible parameters /// or if there is an overflow. pub fn checked_sub(&self, other: &Histogram) -> Result { if self.config != other.config { return Err(Error::IncompatibleParameters); } let mut result = self.clone(); for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) { *this = this.checked_sub(*other).ok_or(Error::Overflow)?; } Ok(result) } /// Subtracts the other histogram from this histogram and returns the result /// as a new histogram. /// /// An error is returned if the two histograms have incompatible parameters. pub fn wrapping_sub(&self, other: &Histogram) -> Result { if self.config != other.config { return Err(Error::IncompatibleParameters); } let mut result = self.clone(); for (this, other) in result.buckets.iter_mut().zip(other.buckets.iter()) { *this = this.wrapping_sub(*other); } Ok(result) } /// Returns the bucket configuration of the histogram. pub fn config(&self) -> Config { self.config } } impl<'a> IntoIterator for &'a Histogram { type Item = Bucket; type IntoIter = Iter<'a>; fn into_iter(self) -> Self::IntoIter { Iter { index: 0, histogram: self, } } } /// An iterator across the histogram buckets. pub struct Iter<'a> { index: usize, histogram: &'a Histogram, } impl<'a> Iterator for Iter<'a> { type Item = Bucket; fn next(&mut self) -> Option<::Item> { if self.index >= self.histogram.buckets.len() { return None; } let bucket = Bucket { count: self.histogram.buckets[self.index], range: self.histogram.config.index_to_range(self.index), }; self.index += 1; Some(bucket) } } impl From<&SparseHistogram> for Histogram { fn from(other: &SparseHistogram) -> Self { let mut histogram = Histogram::with_config(&other.config); for (index, count) in other.index.iter().zip(other.count.iter()) { histogram.buckets[*index] = *count; } histogram } } #[cfg(test)] mod tests { use super::*; use rand::Rng; #[test] fn size() { assert_eq!(std::mem::size_of::(), 48); } #[test] // Tests percentiles fn percentiles() { let mut histogram = Histogram::new(7, 64).unwrap(); assert_eq!(histogram.percentile(50.0).unwrap(), None); assert_eq!( histogram.percentiles(&[50.0, 90.0, 99.0, 99.9]).unwrap(), None ); for i in 0..=100 { let _ = histogram.increment(i); assert_eq!( histogram.percentile(0.0), Ok(Some(Bucket { count: 1, range: 0..=0, })) ); assert_eq!( histogram.percentile(100.0), Ok(Some(Bucket { count: 1, range: i..=i, })) ); } assert_eq!(histogram.percentile(25.0).map(|b| b.unwrap().end()), Ok(25)); assert_eq!(histogram.percentile(50.0).map(|b| b.unwrap().end()), Ok(50)); assert_eq!(histogram.percentile(75.0).map(|b| b.unwrap().end()), Ok(75)); assert_eq!(histogram.percentile(90.0).map(|b| b.unwrap().end()), Ok(90)); assert_eq!(histogram.percentile(99.0).map(|b| b.unwrap().end()), Ok(99)); assert_eq!( histogram.percentile(99.9).map(|b| b.unwrap().end()), Ok(100) ); assert_eq!(histogram.percentile(-1.0), Err(Error::InvalidPercentile)); assert_eq!(histogram.percentile(101.0), Err(Error::InvalidPercentile)); let percentiles: Vec<(f64, u64)> = histogram .percentiles(&[50.0, 90.0, 99.0, 99.9]) .unwrap() .unwrap() .iter() .map(|(p, b)| (*p, b.end())) .collect(); assert_eq!( percentiles, vec![(50.0, 50), (90.0, 90), (99.0, 99), (99.9, 100)] ); let _ = histogram.increment(1024); assert_eq!( histogram.percentile(99.9), Ok(Some(Bucket { count: 1, range: 1024..=1031, })) ); } #[test] #[ignore = "this test is flaky (see issue #100)"] // Tests downsampling fn downsample() { let mut histogram = Histogram::new(8, 32).unwrap(); let mut vals: Vec = Vec::with_capacity(10000); let mut rng = rand::thread_rng(); // Generate 10,000 values to store in a sorted array and a histogram for _ in 0..vals.capacity() { let v: u64 = rng.gen_range(1..2_u64.pow(histogram.config.max_value_power() as u32)); vals.push(v); let _ = histogram.increment(v); } vals.sort(); // List of percentiles to query and validate let mut percentiles: Vec = Vec::with_capacity(109); for i in 20..99 { percentiles.push(i as f64); } let mut tail = vec![ 99.1, 99.2, 99.3, 99.4, 99.5, 99.6, 99.7, 99.8, 99.9, 99.99, 100.0, ]; percentiles.append(&mut tail); // Downsample and check the percentiles lie within error margin let h = histogram.clone(); let grouping_power = histogram.config.grouping_power(); for factor in 1..grouping_power { let error = histogram.config.error(); for p in &percentiles { let v = vals[((*p / 100.0 * (vals.len() as f64)) as usize) - 1]; // Value and relative error from full histogram let vhist = histogram.percentile(*p).unwrap().unwrap().end(); let e = (v.abs_diff(vhist) as f64) * 100.0 / (v as f64); assert!(e < error); } histogram = h.downsample(grouping_power - factor).unwrap(); } } // Return four histograms (three with identical configs and one with a // different config) for testing add and subtract. One of the histograms // should be populated with the maximum u64 value to cause overflows. fn build_histograms() -> (Histogram, Histogram, Histogram, Histogram) { let mut h1 = Histogram::new(1, 3).unwrap(); let mut h2 = Histogram::new(1, 3).unwrap(); let mut h3 = Histogram::new(1, 3).unwrap(); let h4 = Histogram::new(7, 32).unwrap(); for i in 0..h1.config().total_buckets() { h1.as_mut_slice()[i] = 1; h2.as_mut_slice()[i] = 1; h3.as_mut_slice()[i] = u64::MAX; } (h1, h2, h3, h4) } #[test] // Tests checked add fn checked_add() { let (h, h_good, h_overflow, h_mismatch) = build_histograms(); assert_eq!( h.checked_add(&h_mismatch), Err(Error::IncompatibleParameters) ); let r = h.checked_add(&h_good).unwrap(); assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]); assert_eq!(h.checked_add(&h_overflow), Err(Error::Overflow)); } #[test] // Tests wrapping add fn wrapping_add() { let (h, h_good, h_overflow, h_mismatch) = build_histograms(); assert_eq!( h.wrapping_add(&h_mismatch), Err(Error::IncompatibleParameters) ); let r = h.wrapping_add(&h_good).unwrap(); assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]); let r = h.wrapping_add(&h_overflow).unwrap(); assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]); } #[test] // Tests checked sub fn checked_sub() { let (h, h_good, h_overflow, h_mismatch) = build_histograms(); assert_eq!( h.checked_sub(&h_mismatch), Err(Error::IncompatibleParameters) ); let r = h.checked_sub(&h_good).unwrap(); assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]); assert_eq!(h.checked_add(&h_overflow), Err(Error::Overflow)); } #[test] // Tests wrapping sub fn wrapping_sub() { let (h, h_good, h_overflow, h_mismatch) = build_histograms(); assert_eq!( h.wrapping_sub(&h_mismatch), Err(Error::IncompatibleParameters) ); let r = h.wrapping_sub(&h_good).unwrap(); assert_eq!(r.as_slice(), &[0, 0, 0, 0, 0, 0]); let r = h.wrapping_sub(&h_overflow).unwrap(); assert_eq!(r.as_slice(), &[2, 2, 2, 2, 2, 2]); } #[test] // Test creating the histogram from buckets fn from_buckets() { let mut histogram = Histogram::new(8, 32).unwrap(); for i in 0..=100 { let _ = histogram.increment(i); } let buckets = histogram.as_slice(); let constructed = Histogram::from_buckets(8, 32, buckets.to_vec()).unwrap(); assert!(constructed == histogram); } }