Statistics-Test-RandomWalk-0.02/0000755000175000017500000000000011507357477015241 5ustar tseetseeStatistics-Test-RandomWalk-0.02/Changes0000644000175000017500000000032011507357277016525 0ustar tseetseeRevision history for Perl extension Statistics::Test::RandomWalk 0.02 Fri Dec 31 19:00 2010 - Example for testing an integer RNG - Add rescaling factor 0.01 Sat Jan 6 18:33 2007 - original version Statistics-Test-RandomWalk-0.02/Makefile.PL0000644000175000017500000000136311507357232017203 0ustar tseetseeuse 5.006; use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( NAME => 'Statistics::Test::RandomWalk', VERSION_FROM => 'lib/Statistics/Test/RandomWalk.pm', # finds $VERSION LICENSE => 'perl', PREREQ_PM => { Params::Util => '0', Math::BigFloat => '0', Statistics::Test::Sequence => '0', Class::XSAccessor => '1.05', }, # e.g., Module::Name => 1.1 ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/Statistics/Test/RandomWalk.pm', # retrieve abstract from module AUTHOR => 'Steffen Mueller ') : ()), ); Statistics-Test-RandomWalk-0.02/t/0000755000175000017500000000000011507357477015504 5ustar tseetseeStatistics-Test-RandomWalk-0.02/t/20tests.t0000644000175000017500000000155211332011633017151 0ustar tseetseeuse strict; use warnings; use Test::More tests => 16; use Params::Util qw/_ARRAY/; BEGIN { use_ok('Statistics::Test::RandomWalk') }; my $t = Statistics::Test::RandomWalk->new(); isa_ok($t, 'Statistics::Test::RandomWalk'); eval { $t->set_data( [map rand(), 1..10000] ); }; ok(!$@); ok(ref($t->{data}) eq 'ARRAY'); ok(@{$t->{data}} == 10000); my ($alpha, $got, $exp); eval { ($alpha, $got, $exp) = $t->test(10); }; ok(!$@); ok(_ARRAY($alpha)); ok(_ARRAY($got)); ok(_ARRAY($exp)); my $str = $t->data_to_report($alpha, $got, $exp); ok(defined $str and length($str)); eval { $t->set_data( sub { map rand(), 1..100 }, 100 ); }; ok(!$@); eval { ($alpha, $got, $exp) = $t->test(20); }; ok(!$@); ok(_ARRAY($alpha)); ok(_ARRAY($got)); ok(_ARRAY($exp)); $str = $t->data_to_report($alpha, $got, $exp); ok(defined $str and length($str)); Statistics-Test-RandomWalk-0.02/t/00podcover.t0000644000175000017500000000024211332011633017621 0ustar tseetseeuse Test::More; eval "use Test::Pod::Coverage 1.00"; plan skip_all => "Test::Pod::Coverage 1.00 required for testing POD coverage" if $@; all_pod_coverage_ok(); Statistics-Test-RandomWalk-0.02/t/00pod.t0000644000175000017500000000020211332011633016556 0ustar tseetseeuse Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok(); Statistics-Test-RandomWalk-0.02/t/11n_over_k.t0000644000175000017500000000132211332011633017604 0ustar tseetseeuse strict; use warnings; use Test::More tests => 231; BEGIN { use_ok('Statistics::Test::RandomWalk') }; use Math::BigFloat; use Statistics::Test::Sequence; sub n_over_k { my $n = Math::BigFloat->new(shift); my $k = Math::BigFloat->new(shift); return( Statistics::Test::Sequence::faculty($n) / ( Statistics::Test::Sequence::faculty($k) * Statistics::Test::Sequence::faculty($n-$k) ) ); } foreach my $n ( 1..20 ) { foreach my $k (0..$n) { my $str = Statistics::Test::RandomWalk::n_over_k($n, $k); my $test = n_over_k($n, $k); ok( $str == $test, "n_over_k($n, $k) = $str | $test" ); } } Statistics-Test-RandomWalk-0.02/MANIFEST0000644000175000017500000000042411507357477016372 0ustar tseetseeChanges examples/comparison.pl examples/comparison_integer.pl lib/Statistics/Test/RandomWalk.pm Makefile.PL MANIFEST This list of files README t/00pod.t t/00podcover.t t/11n_over_k.t t/20tests.t META.yml Module meta-data (added by MakeMaker) Statistics-Test-RandomWalk-0.02/README0000644000175000017500000000667711507357316016131 0ustar tseetseeNAME Statistics::Test::RandomWalk - Random Walk test for random numbers SYNOPSIS use Statistics::Test::RandomWalk; my $tester = Statistics::Test::RandomWalk->new(); $tester->set_data( [map {rand()} 1..1000000] ); my $no_bins = 10; my ($quant, $got, $expected) = $tester->test($no_bins); print $tester->data_to_report($quant, $got, $expected); DESCRIPTION This module implements a Random Walk test of a random number generator as outlined in Blobel et al (Refer to the SEE ALSO section). Basically, it tests that the numbers "[0,1]" generated by a random number generator are distributed evenly. It divides "[0,1]" into "n" evenly sized bins and calculates the number of expected and actual random numbers in the bin. (In fact, this counts the cumulated numbers, but that works the same.) METHODS new Creates a new random number tester. set_rescale_factor The default range of the random numbers [0, 1) can be rescaled by a constant factor. This method is the setter for that factor. rescale_factor Returns the current rescaling factor. set_data Sets the random numbers to operate on. First argument must be either an array reference to an array of random numbers or a code reference. If the first argument is a code reference, the second argument must be an integer "n". The code reference is called "n"-times and its return values are used as random numbers. The code reference semantics are particularily useful if you do not want to store all random numbers in memory at the same time. You can write a subroutine that, for example, generates and returns batches of 100 random numbers so no more than 101 of these numbers will be in memory at the same time. Note that if you return 100 numbers at once and pass in "n=50", you will have a sequence of 5000 random numbers. test Runs the Random Walk test on the data that was previously set using "set_data". First argument must be the number of bins. Returns three array references. First is an array of quantiles. If the number of bins was ten, this (and all other returned arrays) will hold ten items. Second are the determined numbers of random numbers below the quantiles. Third are the expected counts. data_to_report From the data returned by the "test()" method, this method creates a textual report and returns it as a string. Do not forget to pass in the data that was returned by "test()" or use the "test_report()" method directly if you do not use the data otherwise. SUBROUTINES n_over_k Computes "n" over "k". Uses Perl's big number support and returns a Math::BigFloat object. This sub is memoized. SEE ALSO Math::BigFloat, Memoize, Params::Util Random number generators: Math::Random::MT, Math::Random, Math::Random::OO, Math::TrulyRandom, "/dev/random" where available Statistics::Test::Sequence The algorithm was taken from: (German) Blobel, V., and Lohrmann, E. *Statistische und numerische Methoden der Datenanalyse*. Stuttgart, Leipzig: Teubner, 1998 AUTHOR Steffen Mueller, COPYRIGHT AND LICENSE Copyright (C) 2007-2010 by Steffen Mueller This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.6 or, at your option, any later version of Perl 5 you may have available. Statistics-Test-RandomWalk-0.02/META.yml0000644000175000017500000000124211507357477016511 0ustar tseetsee--- #YAML:1.0 name: Statistics-Test-RandomWalk version: 0.02 abstract: Random Walk test for random numbers author: - Steffen Mueller license: perl distribution_type: module configure_requires: ExtUtils::MakeMaker: 0 build_requires: ExtUtils::MakeMaker: 0 requires: Class::XSAccessor: 1.05 Math::BigFloat: 0 Params::Util: 0 Statistics::Test::Sequence: 0 no_index: directory: - t - inc generated_by: ExtUtils::MakeMaker version 6.55_02 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 Statistics-Test-RandomWalk-0.02/lib/0000755000175000017500000000000011507357477016007 5ustar tseetseeStatistics-Test-RandomWalk-0.02/lib/Statistics/0000755000175000017500000000000011507357477020141 5ustar tseetseeStatistics-Test-RandomWalk-0.02/lib/Statistics/Test/0000755000175000017500000000000011507357477021060 5ustar tseetseeStatistics-Test-RandomWalk-0.02/lib/Statistics/Test/RandomWalk.pm0000644000175000017500000001620311507356323023444 0ustar tseetseepackage Statistics::Test::RandomWalk; use 5.006; use strict; use warnings; our $VERSION = '0.02'; use Carp qw/croak/; use Params::Util qw/_POSINT _ARRAY _CODE/; use Memoize; use Math::BigFloat; use Statistics::Test::Sequence; use Class::XSAccessor { constructor => 'new', getters => { rescale_factor => 'rescale', }, setters => { set_rescale_factor => 'rescale', }, }; =head1 NAME Statistics::Test::RandomWalk - Random Walk test for random numbers =head1 SYNOPSIS use Statistics::Test::RandomWalk; my $tester = Statistics::Test::RandomWalk->new(); $tester->set_data( [map {rand()} 1..1000000] ); my $no_bins = 10; my ($quant, $got, $expected) = $tester->test($no_bins); print $tester->data_to_report($quant, $got, $expected); =head1 DESCRIPTION This module implements a Random Walk test of a random number generator as outlined in Blobel et al (Refer to the SEE ALSO section). Basically, it tests that the numbers C<[0,1]> generated by a random number generator are distributed evenly. It divides C<[0,1]> into C evenly sized bins and calculates the number of expected and actual random numbers in the bin. (In fact, this counts the cumulated numbers, but that works the same.) =head1 METHODS =head2 new Creates a new random number tester. =head2 set_rescale_factor The default range of the random numbers [0, 1) can be rescaled by a constant factor. This method is the setter for that factor. =head2 rescale_factor Returns the current rescaling factor. =head2 set_data Sets the random numbers to operate on. First argument must be either an array reference to an array of random numbers or a code reference. If the first argument is a code reference, the second argument must be an integer C. The code reference is called C-times and its return values are used as random numbers. The code reference semantics are particularily useful if you do not want to store all random numbers in memory at the same time. You can write a subroutine that, for example, generates and returns batches of 100 random numbers so no more than 101 of these numbers will be in memory at the same time. Note that if you return 100 numbers at once and pass in C, you will have a sequence of 5000 random numbers. =cut sub set_data { my $self = shift; my $data = shift; if (_ARRAY($data)) { $self->{data} = $data; return 1; } elsif (_CODE($data)) { $self->{data} = $data; my $n = shift; if (not _POSINT($n)) { croak("'set_data' needs an integer as second argument if the first argument is a code reference."); } $self->{n} = $n; return 1; } else { croak("Invalid arguments to 'set_data'."); } } =head2 test Runs the Random Walk test on the data that was previously set using C. First argument must be the number of bins. Returns three array references. First is an array of quantiles. If the number of bins was ten, this (and all other returned arrays) will hold ten items. Second are the determined numbers of random numbers below the quantiles. Third are the expected counts. =cut sub test { my $self = shift; my $bins = shift; if (not _POSINT($bins)) { croak("Expecting number of bins as argument to 'test'"); } my $rescale_factor = $self->rescale_factor||1; my $data = $self->{data}; if (not defined $data) { croak("Set data using 'set_data' first."); } my $step = 1 / $bins * $rescale_factor; my @alpha; push @alpha, $_*$step for 1..$bins; my @bins = (0) x $bins; my $numbers; if (_ARRAY($data)) { $numbers = @$data; foreach my $i (@$data) { foreach my $ai (0..$#alpha) { if ($i < $alpha[$ai]) { $bins[$_]++ for $ai..$#alpha; last; } } } } else { # CODE my @cache; my $calls = $self->{n}; foreach (1..$calls) { # get new data push @cache, $data->(); while (@cache) { $numbers++; my $this = shift @cache; foreach my $ai (0..$#alpha) { if ($this < $alpha[$ai]) { $bins[$_]++ for $ai..$#alpha; last; } } } } } my @expected_smaller = map Math::BigFloat->new($numbers)*$_/$rescale_factor, @alpha; return( [map $_/$rescale_factor, @alpha], \@bins, \@expected_smaller, ); } =head2 data_to_report From the data returned by the C method, this method creates a textual report and returns it as a string. Do not forget to pass in the data that was returned by C or use the C method directly if you do not use the data otherwise. =cut sub data_to_report { my $self = shift; my $alpha = shift; my $got = shift; my $expected = shift; if (grep {not _ARRAY($_)} ($alpha, $got, $expected)) { croak("Please pass the data returned from 'test' to the 'data_to_report' method."); } my $max_a = _max_length($alpha); $max_a = length('Quantile') if length('Quantile') > $max_a; my $max_g = _max_length($got); $max_g = length('Got') if length('Got') > $max_g; my $max_e = _max_length($expected); $max_e = length('Expected') if length('Expected') > $max_e; my $str = ''; $str .= sprintf( "\%${max_a}s | \%${max_g}s | \%${max_e}s\n", qw/Quantile Got Expected/ ); $str .= ('-' x (length($str)-1))."\n"; foreach my $i (0..$#$alpha) { $str .= sprintf( "\%${max_a}f | \%${max_g}u | \%${max_e}u\n", $alpha->[$i], $got->[$i], $expected->[$i] ); } return $str; } sub _max_length { my $max = 0; foreach (@{$_[0]}) { $max = length $_ if length($_) > $max; } return $max; } =head1 SUBROUTINES =head2 n_over_k Computes C over C. Uses Perl's big number support and returns a L object. This sub is memoized. =cut memoize('n_over_k'); sub n_over_k { my $n = shift; my $k = shift; my @bits = ((0) x $k, (1) x ($n-$k)); foreach my $x (1..($n-$k)) { $bits[$x-1]--; } my $o = Math::BigFloat->bone(); foreach my $i (0..$#bits) { $o *= Math::BigFloat->new($i+1)**$bits[$i] if $bits[$i] != 0; } return $o->ffround(0); } 1; __END__ =head1 SEE ALSO L, L, L Random number generators: L, L, L, L, C where available L The algorithm was taken from: (German) Blobel, V., and Lohrmann, E. I. Stuttgart, Leipzig: Teubner, 1998 =head1 AUTHOR Steffen Mueller, Esmueller@cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright (C) 2007-2010 by Steffen Mueller This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.6 or, at your option, any later version of Perl 5 you may have available. =cut Statistics-Test-RandomWalk-0.02/examples/0000755000175000017500000000000011507357477017057 5ustar tseetseeStatistics-Test-RandomWalk-0.02/examples/comparison_integer.pl0000644000175000017500000000200611507357177023276 0ustar tseetsee#!/usr/bin/perl use strict; use warnings; use lib 'lib'; use Statistics::Test::RandomWalk; use Data::Dumper; use Math::Random::MT; my $t = Statistics::Test::RandomWalk->new(); # If you have rng's that provide an integer in the range [0, $nmax), # then this is how you can test them: my $rnd; open my $fh, '<', '/dev/urandom' or die $!; read($fh, $rnd, 32); $rnd = unpack('%L', $rnd); my $gen = Math::Random::MT->new($rnd); my $nmax = 13; # the maximum integer returned $t->set_rescale_factor($nmax); my $num = 20000; foreach ( [ 'rand', sub {map int($nmax*rand()), 1..10000}, int($num/10000)+1 ], [ 'MT', sub {map int($nmax*$gen->rand()), 1..10000}, int($num/10000)+1 ], ) { my $name = shift @$_; $t->set_data(@$_); print "Testing $name...\n"; # If $nmax is too large for your convenience, you can # instead test $nmax/$something. # (the result of which needs to be an integer...) my ($alpha, $got, $expected) = $t->test($nmax); print $t->data_to_report($alpha, $got, $expected); } Statistics-Test-RandomWalk-0.02/examples/comparison.pl0000644000175000017500000000152211332011633021537 0ustar tseetsee#!/usr/bin/perl use strict; use warnings; use lib 'lib'; use Statistics::Test::RandomWalk; use Data::Dumper; use Math::Random::MT; my $t = Statistics::Test::RandomWalk->new(); my $rnd; open my $fh, '<', '/dev/random' or die $!; read($fh, $rnd, 32); $rnd = unpack('%L', $rnd); my $gen = Math::Random::MT->new($rnd); { my $x = 4711; my $a = 421; my $c = 64773; my $m = 259200; sub lin_kong { $x = ($a*$x + $c) % $m; return $x/$m; } } my $num = 100000; foreach ( [ 'rand', sub {map rand(), 1..10000}, $num/10000 ], [ 'MT', sub {map $gen->rand(), 1..10000}, $num/10000 ], [ 'lin', \&lin_kong, $num ], ) { my $name = shift @$_; $t->set_data(@$_); print "Testing $name...\n"; my ($alpha, $got, $expected) = $t->test(100); print $t->data_to_report($alpha, $got, $expected); }