mini-histogram-0.1.3/0000755000175100017510000000000013637637604013507 5ustar pravipravimini-histogram-0.1.3/.gitignore0000644000175100017510000000012713637637604015477 0ustar pravipravi/.bundle/ /.yardoc /_yardoc/ /coverage/ /doc/ /pkg/ /spec/reports/ /tmp/ Gemfile.lock mini-histogram-0.1.3/Rakefile0000644000175100017510000000136613637637604015162 0ustar pravipravirequire "bundler/gem_tasks" require "rake/testtask" $LOAD_PATH.unshift File.expand_path("./lib", __dir__) Rake::TestTask.new(:test) do |t| t.libs << "test" t.libs << "lib" t.test_files = FileList["test/**/*_test.rb"] end task :default => :test task :bench do require 'benchmark/ips' require 'enumerable/statistics' require 'mini_histogram' array = 1000.times.map { rand } histogram = MiniHistogram.new(array) my_weights = histogram.weights puts array.histogram.weights == my_weights puts array.histogram.weights.inspect puts my_weights.inspect Benchmark.ips do |x| x.report("enumerable stats") { array.histogram } x.report("mini histogram ") { MiniHistogram.new(array).weights } x.compare! end end mini-histogram-0.1.3/.github/0000755000175100017510000000000013637637604015047 5ustar pravipravimini-histogram-0.1.3/.github/workflows/0000755000175100017510000000000013637637604017104 5ustar pravipravimini-histogram-0.1.3/.github/workflows/check_changelog.yml0000644000175100017510000000056013637637604022714 0ustar pravipraviname: Check Changelog on: [pull_request] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - name: Check that CHANGELOG is touched run: | cat $GITHUB_EVENT_PATH | jq .pull_request.title | grep -i '\[\(\(changelog skip\)\|\(ci skip\)\)\]' || git diff remotes/origin/${{ github.base_ref }} --name-only | grep CHANGELOG.md mini-histogram-0.1.3/bin/0000755000175100017510000000000013637637604014257 5ustar pravipravimini-histogram-0.1.3/bin/console0000755000175100017510000000053513637637604015652 0ustar pravipravi#!/usr/bin/env ruby require "bundler/setup" require "mini_histogram" # You can add fixtures and/or initialization code here to make experimenting # with your gem easier. You can also use a different console, if you like. # (If you use this, don't forget to add pry to your Gemfile!) # require "pry" # Pry.start require "irb" IRB.start(__FILE__) mini-histogram-0.1.3/bin/setup0000755000175100017510000000020313637637604015340 0ustar pravipravi#!/usr/bin/env bash set -euo pipefail IFS=$'\n\t' set -vx bundle install # Do any other automated setup that you need to do here mini-histogram-0.1.3/mini_histogram.gemspec0000644000175100017510000000262513637637604020072 0ustar pravipravirequire_relative 'lib/mini_histogram/version' Gem::Specification.new do |spec| spec.name = "mini_histogram" spec.version = MiniHistogram::VERSION spec.authors = ["schneems"] spec.email = ["richard.schneeman+foo@gmail.com"] spec.summary = %q{A small gem for building histograms out of Ruby arrays} spec.description = %q{It makes histograms out of Ruby data. How cool is that!? Pretty cool if you ask me.} spec.homepage = "https://github.com/zombocom/mini_histogram" spec.license = "MIT" spec.required_ruby_version = Gem::Requirement.new(">= 2.1.0") spec.metadata["homepage_uri"] = spec.homepage # spec.metadata["source_code_uri"] = "blerg" # spec.metadata["changelog_uri"] = "blerg" # Specify which files should be added to the gem when it is released. # The `git ls-files -z` loads the files in the RubyGem that have been added into git. spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } end spec.bindir = "exe" spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } spec.require_paths = ["lib"] spec.add_development_dependency "m" # Used for comparison testing, but only supports Ruby 2.4+ # spec.add_development_dependency "enumerable-statistics" spec.add_development_dependency "benchmark-ips" end mini-histogram-0.1.3/CHANGELOG.md0000644000175100017510000000043113637637604015316 0ustar pravipravi## Master ## 0.1.3 - Handle edge cases (https://github.com/zombocom/mini_histogram/pull/2) ## 0.1.2 - Add `edge` as alias to `edges` ## 0.1.1 - Fix multi histogram weights, with set_average_edges! method (https://github.com/zombocom/mini_histogram/pull/1) ## 0.1.0 - First mini-histogram-0.1.3/lib/0000755000175100017510000000000013637637604014255 5ustar pravipravimini-histogram-0.1.3/lib/mini_histogram.rb0000644000175100017510000001303713637637604017617 0ustar pravipravirequire "mini_histogram/version" # A class for building histogram info # # Given an array, this class calculates the "edges" of a histogram # these edges mark the boundries for "bins" # # array = [1,1,1, 5, 5, 5, 5, 10, 10, 10] # histogram = MiniHistogram.new(array) # puts histogram.edges # # => [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0] # # It also finds the weights (aka count of values) that would go in each bin: # # puts histogram.weights # # => [3, 0, 4, 0, 0, 3] # # This means that the `array` here had three items between 0.0 and 2.0. # class MiniHistogram class Error < StandardError; end attr_reader :array, :left_p, :max def initialize(array, left_p: true, edges: nil) @array = array @left_p = left_p @edges = edges @weights = nil @min, @max = array.minmax end def edges_min edges.min end def edges_max edges.max end def histogram(*_) self end def closed @left_p ? :left : :right end # Sets the edge value to something new, # also clears any previously calculated values def update_values(edges:, max: ) @edges = edges @max = max @weights = nil # clear memoized value end def bin_size return 0 if edges.length <= 1 edges[1] - edges[0] end # Weird name, right? There are multiple ways to # calculate the number of "bins" a histogram should have, one # of the most common is the "sturges" method # # Here are some alternatives from numpy: # https://github.com/numpy/numpy/blob/d9b1e32cb8ef90d6b4a47853241db2a28146a57d/numpy/lib/histograms.py#L489-L521 def sturges len = array.length return 1.0 if len == 0 # return (long)(ceil(Math.log2(n)) + 1); return Math.log2(len).ceil + 1 end # Given an array of edges and an array we want to generate a histogram from # return the counts for each "bin" # # Example: # # a = [1,1,1, 5, 5, 5, 5, 10, 10, 10] # edges = [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0] # # MiniHistogram.new(a).weights # # => [3, 0, 4, 0, 0, 3] # # This means that the `a` array has 3 values between 0.0 and 2.0 # 4 values between 4.0 and 6.0 and three values between 10.0 and 12.0 def weights return @weights if @weights return @weights = [] if array.empty? lo = edges.first step = edges[1] - edges[0] max_index = ((@max - lo) / step).floor @weights = Array.new(max_index + 1, 0) array.each do |x| index = ((x - lo) / step).floor @weights[index] += 1 end return @weights end # Finds the "edges" of a given histogram that will mark the boundries # for the histogram's "bins" # # Example: # # a = [1,1,1, 5, 5, 5, 5, 10, 10, 10] # MiniHistogram.new(a).edges # # => [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0] # # There are multiple ways to find edges, this was taken from # https://github.com/mrkn/enumerable-statistics/issues/24 # # Another good set of implementations is in numpy # https://github.com/numpy/numpy/blob/d9b1e32cb8ef90d6b4a47853241db2a28146a57d/numpy/lib/histograms.py#L222 def edges return @edges if @edges return @edges = [0.0] if array.empty? lo = @min hi = @max nbins = sturges.to_f if hi == lo start = lo step = 1.0 divisor = 1.0 len = 1 else bw = (hi - lo) / nbins lbw = Math.log10(bw) if lbw >= 0 step = 10 ** lbw.floor * 1.0 r = bw/step if r <= 1.1 # do nothing elsif r <= 2.2 step *= 2.0 elsif r <= 5.5 step *= 5.0 else step *= 10 end divisor = 1.0 start = step * (lo/step).floor len = ((hi - start)/step).ceil else divisor = 10 ** - lbw.floor r = bw * divisor if r <= 1.1 # do nothing elsif r <= 2.2 divisor /= 2.0 elsif r <= 5.5 divisor /= 5.0 else divisor /= 10.0 end step = 1.0 start = (lo * divisor).floor len = (hi * divisor - start).ceil end end if left_p while (lo < start/divisor) start -= step end while (start + (len - 1)*step)/divisor <= hi len += 1 end else while lo <= start/divisor start -= step end while (start + (len - 1)*step)/divisor < hi len += 1 end end @edges = [] len.times.each do @edges << start/divisor start += step end return @edges end alias :edge :edges # Given an array of Histograms this function calcualtes # an average edge size along with the minimum and maximum # edge values. It then updates the edge value on all inputs # # The main pourpose of this method is to be able to chart multiple # distributions against a similar axis # # See for more context: https://github.com/schneems/derailed_benchmarks/pull/169 def self.set_average_edges!(*array_of_histograms) array_of_histograms.each { |x| raise "Input expected to be a histogram but is #{x.inspect}" unless x.is_a?(MiniHistogram) } steps = array_of_histograms.map(&:bin_size) avg_step_size = steps.inject(&:+).to_f / steps.length max_value = array_of_histograms.map(&:max).max max_edge = array_of_histograms.map(&:edges_max).max min_edge = array_of_histograms.map(&:edges_min).min average_edges = [min_edge] while average_edges.last < max_edge average_edges << average_edges.last + avg_step_size end array_of_histograms.each {|h| h.update_values(edges: average_edges, max: max_value) } return array_of_histograms end end mini-histogram-0.1.3/lib/mini_histogram/0000755000175100017510000000000013637637604017266 5ustar pravipravimini-histogram-0.1.3/lib/mini_histogram/version.rb0000644000175100017510000000005413637637604021277 0ustar pravipraviclass MiniHistogram VERSION = "0.1.3" end mini-histogram-0.1.3/README.md0000644000175100017510000000606413637637604014774 0ustar pravipravi# MiniHistogram [![Build Status](https://travis-ci.org/zombocom/mini_histogram.svg?branch=master)](https://travis-ci.org/zombocom/mini_histogram) What's a histogram and why should you care? First read [Lies, Damned Lies, and Averages: Perc50, Perc95 explained for Programmers](https://schneems.com/2020/03/17/lies-damned-lies-and-averages-perc50-perc95-explained-for-programmers/). This library lets you build histograms in pure Ruby. ## Installation Add this line to your application's Gemfile: ```ruby gem 'mini_histogram' ``` And then execute: $ bundle install Or install it yourself as: $ gem install mini_histogram ## Usage Given an array, this class calculates the "edges" of a histogram these edges mark the boundries for "bins" ```ruby array = [1,1,1, 5, 5, 5, 5, 10, 10, 10] histogram = MiniHistogram.new(array) puts histogram.edges # => [0.0, 2.0, 4.0, 6.0, 8.0, 10.0, 12.0] ``` It also finds the weights (aka count of values) that would go in each bin: ``` puts histogram.weights # => [3, 0, 4, 0, 0, 3] ``` This means that the `array` here had three items between 0.0 and 2.0, four items between 4.0 and 6.0 and three items between 10.0 and 12.0 Alternatives to this gem include https://github.com/mrkn/enumerable-statistics/. I needed this gem to be able to calculate a "shared" or "average" edge value as seen in this PR https://github.com/mrkn/enumerable-statistics/pull/23. So that I could add histograms to derailed benchmarks: https://github.com/schneems/derailed_benchmarks/pull/169. This gem provides a `MiniHistogram.set_average_edges!` method to help there. Also this gem does not require a native extension compilation (faster to install, but performance is slower), and this gem does not extend or monkeypatch an core classes. [MiniHistogram API Docs](https://rubydoc.info/github/zombocom/mini_histogram/master/MiniHistogram) ## Development After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). ## Contributing Bug reports and pull requests are welcome on GitHub at https://github.com/zombocom/mini_histogram. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/zombocom/mini_histogram/blob/master/CODE_OF_CONDUCT.md). ## License The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT). ## Code of Conduct Everyone interacting in the MiniHistogram project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/zombocom/mini_histogram/blob/master/CODE_OF_CONDUCT.md). mini-histogram-0.1.3/.travis.yml0000644000175100017510000000012113637637604015612 0ustar pravipravi--- language: ruby cache: bundler rvm: - 2.1 - 2.2 - 2.5 - 2.6 - 2.7.0 mini-histogram-0.1.3/Gemfile0000644000175100017510000000022313637637604014777 0ustar pravipravisource "https://rubygems.org" # Specify your gem's dependencies in mini_histogram.gemspec gemspec gem "rake", "~> 12.0" gem "minitest", "~> 5.0" mini-histogram-0.1.3/CODE_OF_CONDUCT.md0000644000175100017510000000625213637637604016313 0ustar pravipravi# Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at richard.schneeman+foo@gmail.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://contributor-covenant.org/version/1/4][version] [homepage]: https://contributor-covenant.org [version]: https://contributor-covenant.org/version/1/4/ mini-histogram-0.1.3/LICENSE.txt0000644000175100017510000000206313637637604015333 0ustar pravipraviThe MIT License (MIT) Copyright (c) 2020 schneems Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.