energy/0000755000176000001440000000000012423640203011567 5ustar ripleyusersenergy/src/0000755000176000001440000000000012423524166012367 5ustar ripleyusersenergy/src/Ecluster.cc0000644000176000001440000000455512423524166014475 0ustar ripleyusers/* Ecluster.cc: energy package Author: Maria Rizzo Created: Created: 12 Dec 2002 Revised: 4 Jan 2004 for R-1.8.1 energy package Revised: 28 Jan 2004 */ #include "R.h" #include "Rmath.h" #include "ECl.h" extern "C" { double **alloc_matrix(int r, int c); void free_matrix(double **matrix, int r, int c); void Emin_hclust(double *diss, int *en, int *merge, double *height, int *order); void lower2square(double **dst, double *diss, int n); void Emin_hclust(double *diss, int *en, int *merge, double *height, int *order) { // performs hierarchical E-clustering by minimum cluster E-distance // diss lower.tri of n by n distance matrix, column order // en sample size n // merge (n-1) by 2 array as a vector in col order; see hclust object // height vector length n-1; see hclust object // order vector length n; see hclust object int i, step, I, J; int n = (*en); double e; double *E; double **Edst; double **dst; int *m1, *m2; ECl c; //clustering object c.init(n); dst = alloc_matrix(n, n); Edst = alloc_matrix(n, n); //E dist between clusters E = Calloc(n, double); m1 = Calloc(n-1, int); m2 = Calloc(n-1, int); // convert lower.tri in vector form to square matrix lower2square(dst, diss, n); //E-hierarchical clustering E[0] = c.init_Edst(dst, Edst); step = 0; while (c.len() > 1) { e = c.merge_minEdst(dst, Edst); c.last_pair(&I, &J); height[step] = c.ht(I); step = c.last_merge(m1+step, m2+step); E[step] = e; } //compute the return values for merge and order E[n-1] = 0.0; for (i=0; i Created: 12 Dec 2002 Revised: 4 Jan 2004 for R-1.8.1 energy package Revised: 28 Jan 2004 */ #include "ECl.h" #include extern "C" { //implementation of Cl and ECl classes Cl::~Cl() { //destructor int i; if (isinit==1) { Free(size); Free(step); Free(height); Free(w); for (i=0; i 0) { I = -m1[0]-1; J = -m2[0]-1; combine(I, J); w[0] = J; w[1] = I; i = 1; while (nclus > k) { I = (m1[i]<0)? -m1[i]-1 : w[m1[i]]; J = (m2[i]<0)? -m2[i]-1 : w[m2[i]]; combine(I, J); i++; w[i] = I; } } nclus = clusters(); return nclus; } int Cl::init(int m, int *G, int base) { //initialize cluster with group membership vector G int g, i; init(m); if (base > 0) for (i=0; i0) { cl[k++] = i; m+= size[i]; } if (k!=nclus) error("nclus error"); if (m!=n) error("total size error"); return nclus; } int Cl::clusters() { //count the number of non-empty clusters, and set nclus int i, k=0; for (i=0; i0) k++; if (k>n || k<1) error("nclus error"); nclus = k; return nclus; } int Cl::combine(int I, int J) { //merge Jth row into Ith row //w is preserved int j, m; if (I==J) error("c:I==J"); if (I<0 || J<0 || I>=n || J>=n) error("c:I,J error"); if (size[I]<=0 || size[J]<=0) error("c:empty cluster"); if (nclus < 2) error("c:1 cluster"); m = size[I]; for (j=0; j 0) { for (j=0; j 0) for (i=0; i 0) for (j=0; j 0) for (i=0; i n) return -1; return 0; } int Cl::proximity(int **p) { //p[i][j] is 1 if (i,j) in same cluster //p[i][j] is 0 if (i,j) in different clusters int a, b, i, j, k; for (i=0; i height[J]) { I=w[1]; J=w[0]; } height[I]=Ed[I][J]; combine(I, J); update_Edst(I, J, dst, Ed); return 0.0; } if (nclus == 1) error("last cluster"); if (nclus < 1) error("nclus<1"); I=J=-1; find_minEdst(Ed, &I, &J); if (I>=0) { if (J < I) { p=I; I=J; J=p; } hI = hJ = 0.0; if (step[I] > 0) hI = height[I]; if (step[J] > 0) hJ = height[J]; if (hJ < hI) { p=I; I=J; J=p; } height[I] = Ed[I][J]; d=combine(I,J); if(!d) error("merge_best_pair error"); pE = E; E = update_Edst(I, J, dst, Ed); } return E; } } //end extern "C" energy/src/utilities.c0000644000176000001440000001402412423524166014547 0ustar ripleyusers/* utilities.c: some utilities for the energy package Author: Maria L. Rizzo (see energy package on CRAN or at personal.bgsu.edu/~mrizzo) alloc_matrix, alloc_int_matrix, free_matrix, free_int_matrix: use R (Calloc, Free) instead of C (calloc, free) for memory management permute permutes the first n elements of an integer vector row_order converts arg from column order to row order vector2matrix copies double* arg into double** arg distance computes Euclidean distance matrix from double** Euclidean_distance computes Euclidean distance matrix from double* index_distance computes Euclidean distance matrix D then D^index sumdist sums the distance matrix without creating the matrix Notes: 1. index_distance (declaration and body of the function) revised in energy 1.3-0, 2/2011. */ #include #include double **alloc_matrix(int r, int c); int **alloc_int_matrix(int r, int c); void free_matrix(double **matrix, int r, int c); void free_int_matrix(int **matrix, int r, int c); void permute(int *J, int n); void permute_check(int *J, int *N); void roworder(double *x, int *byrow, int r, int c); void vector2matrix(double *x, double **y, int N, int d, int isroworder); void distance(double **bxy, double **D, int N, int d); void Euclidean_distance(double *x, double **Dx, int n, int d); void index_distance(double **Dx, int n, double index); void sumdist(double *x, int *byrow, int *nrow, int *ncol, double *lowersum); double **alloc_matrix(int r, int c) { /* allocate a matrix with r rows and c columns */ int i; double **matrix; matrix = Calloc(r, double *); for (i = 0; i < r; i++) matrix[i] = Calloc(c, double); return matrix; } int **alloc_int_matrix(int r, int c) { /* allocate an integer matrix with r rows and c columns */ int i; int **matrix; matrix = Calloc(r, int *); for (i = 0; i < r; i++) matrix[i] = Calloc(c, int); return matrix; } void free_matrix(double **matrix, int r, int c) { /* free a matrix with r rows and c columns */ int i; for (i = 0; i < r; i++) Free(matrix[i]); Free(matrix); } void free_int_matrix(int **matrix, int r, int c) { /* free an integer matrix with r rows and c columns */ int i; for (i = 0; i < r; i++) Free(matrix[i]); Free(matrix); } void permute(int *J, int n) { /* permute the first n integers of J if n is length(J), equivalent to R: J <- rev(sample(J, length(J), replace=FALSE)) */ int i, j, j0, m=n; for (i=0; i DBL_EPSILON) { for (i=0; i Created: 12 Dec 2002 Revised: 4 Jan 2004 for R-1.8.1 energy package Revised: 28 Jan 2004 */ #define DLLEXPORT #include #include #include #include //declarations for class Cl for hierarchical cluster analysis //and ECl for e-clustering #define EPS DBL_EPSILON*20.0 #define ONE 1.0+DBL_EPSILON*20.0 class DLLEXPORT Cl { //for cluster analysis protected: int n; //number of observations int nclus; //number of clusters int it; //number of changes to clusters int pstep1; int pstep2; int psize1; int psize2; int r1; int r2; int c1; int c2; int temp; int isinit; //is memory allocated for arrays int *size; //sizes of clusters int *step; //step when cluster formed double *height; //distance between merging clusters int *w; int **clus; //indices of observations public: Cl(){isinit=0;}; //no memory is allocated, call init(n) ~Cl(); int init(int m); int init(int n, int *m1, int *m2, int k); int init(int m, int *G, int base); int dim() {return n;} int len() {return nclus;} int len(int i) {return size[i];} int obs(int i, int j) {return clus[i][j];} double ht(int i) {return height[i];} int next_cl(int p) {p++;while(p #include void dCOVtest(double *x, double *y, int *byrow, int *dims, double *index, double *reps, double *DCOV, double *pval); void dCovTest(double *x, double *y, int *byrow, int *dims, double *index, double *reps, double *Dstat, double *pval); void dCOV(double *x, double *y, int *byrow, int *dims, double *index, int *idx, double *DCOV); double Akl(double **akl, double **A, int n); /* functions in utilities.c */ extern double **alloc_matrix(int r, int c); extern int **alloc_int_matrix(int r, int c); extern void free_matrix(double **matrix, int r, int c); extern void free_int_matrix(int **matrix, int r, int c); extern void permute(int *J, int n); extern void roworder(double *x, int *byrow, int r, int c); extern void Euclidean_distance(double *x, double **Dx, int n, int d); extern void index_distance(double **Dx, int n, double index); extern void vector2matrix(double *x, double **y, int N, int d, int isroworder); void dCOVtest(double *x, double *y, int *byrow, int *dims, double *index, double *reps, double *DCOV, double *pval) { /* computes dCov(x,y), dCor(x,y), dVar(x), dVar(y) V-statistic is n*dCov^2 where n*dCov^2 --> Q dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) dims[3] = dst (logical, TRUE if x, y are distances) dims[4] = R (number of replicates) index : exponent for distance DCOV : vector [dCov, dCor, dVar(x), dVar(y), mean(A), mean(B)] */ int i, j, k, n, n2, p, q, r, J, K, M, R; int dst; int* perm; double **Dx, **Dy, **A, **B; double dcov, V; n = dims[0]; p = dims[1]; q = dims[2]; dst = dims[3]; R = dims[4]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } /* critical to pass correct flag dst from R */ Dx = alloc_matrix(n, n); Dy = alloc_matrix(n, n); if (dst) { vector2matrix(x, Dx, n, n, 1); vector2matrix(y, Dy, n, n, 1); } else { Euclidean_distance(x, Dx, n, p); Euclidean_distance(y, Dy, n, q); } index_distance(Dx, n, *index); index_distance(Dy, n, *index); A = alloc_matrix(n, n); B = alloc_matrix(n, n); Akl(Dx, A, n); Akl(Dy, B, n); free_matrix(Dx, n, n); free_matrix(Dy, n, n); n2 = ((double) n) * n; /* compute dCov(x,y), dVar(x), dVar(y) */ for (k=0; k<4; k++) DCOV[k] = 0.0; for (k=0; k 0) DCOV[k] = sqrt(DCOV[k]); else DCOV[k] = 0.0; } /* compute dCor(x, y) */ V = DCOV[2]*DCOV[3]; if (V > DBL_EPSILON) DCOV[1] = DCOV[0] / sqrt(V); else DCOV[1] = 0.0; if (R > 0) { /* compute the replicates */ if (DCOV[1] > 0.0) { perm = Calloc(n, int); M = 0; for (i=0; i= DCOV[0]) M++; } *pval = (double) (M+1) / (double) (R+1); PutRNGstate(); Free(perm); } else { *pval = 1.0; } } free_matrix(A, n, n); free_matrix(B, n, n); return; } void dCOV(double *x, double *y, int *byrow, int *dims, double *index, int *idx, double *DCOV) { /* computes dCov(x,y), dCor(x,y), dVar(x), dVar(y) V-statistic is n*dCov^2 where n*dCov^2 --> Q dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) dims[3] = dst (logical, TRUE if x, y are distances) index : exponent for distance idx : index vector, a permutation of sample indices DCOV : vector [dCov, dCor, dVar(x), dVar(y)] */ int j, k, n, n2, p, q, dst; double **Dx, **Dy, **A, **B; double V; n = dims[0]; p = dims[1]; q = dims[2]; dst = dims[3]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } /* critical to pass correct flag dst from R */ Dx = alloc_matrix(n, n); Dy = alloc_matrix(n, n); if (dst) { vector2matrix(x, Dx, n, n, 1); vector2matrix(y, Dy, n, n, 1); } else { Euclidean_distance(x, Dx, n, p); Euclidean_distance(y, Dy, n, q); } index_distance(Dx, n, *index); index_distance(Dy, n, *index); A = alloc_matrix(n, n); B = alloc_matrix(n, n); Akl(Dx, A, n); Akl(Dy, B, n); free_matrix(Dx, n, n); free_matrix(Dy, n, n); n2 = ((double) n) * n; /* compute dCov(x,y), dVar(x), dVar(y) */ for (k=0; k<4; k++) DCOV[k] = 0.0; for (k=0; k 0) DCOV[k] = sqrt(DCOV[k]); else DCOV[k] = 0.0; } /* compute dCor(x, y) */ V = DCOV[2]*DCOV[3]; if (V > DBL_EPSILON) DCOV[1] = DCOV[0] / sqrt(V); else DCOV[1] = 0.0; free_matrix(A, n, n); free_matrix(B, n, n); return; } double Akl(double **akl, double **A, int n) { /* -computes the A_{kl} or B_{kl} distances from the distance matrix (a_{kl}) or (b_{kl}) for dCov, dCor, dVar dCov = mean(Akl*Bkl), dVar(X) = mean(Akl^2), etc. */ int j, k; double *akbar; double abar; akbar = Calloc(n, double); abar = 0.0; for (k=0; k Q dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) dims[3] = B (number of replicates, dimension of reps) index : exponent for distance Dstat : the statistic dCov^2 (V_n^2) and S1, S2, S3 */ int b, i, j, k, n, p , q, B, I, J, M; int *perm; double Cx, Cy, Cxy, C3, S1, S2, S3, n2, n3; double **Dx, **Dy; n = dims[0]; p = dims[1]; q = dims[2]; B = dims[3]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } Dx = alloc_matrix(n, n); Dy = alloc_matrix(n, n); Euclidean_distance(x, Dx, n, p); Euclidean_distance(y, Dy, n, q); index_distance(Dx, n, *index); index_distance(Dy, n, *index); Cx = Cy = Cxy = C3 = 0.0; n2 = ((double) n) * n; n3 = n2 * n; /* compute observed test statistic */ for (i=0; i 0) { GetRNGstate(); if (Dstat[0] > 0.0) { perm = Calloc(n, int); M = 0; for (i=0; i= (*Dstat)) M++; } *pval = (double) (M+1) / (double) (B+1); PutRNGstate(); Free(perm); } else { *pval = 1.0; } } /* test statistic (the V-statistic) is nV_n^2 = n*Dstat[0] a normalized version is n*Dstat[0]/Dstat[2] */ free_matrix(Dx, n, n); free_matrix(Dy, n, n); return; } energy/src/energy.c0000644000176000001440000002726012423524166014033 0ustar ripleyusers/* energy.c: energy package Author: Maria Rizzo Created: 4 Jan 2004 Last Updated: 2 April 2008 some functions moved to utilities.c mvnEstat() computes the E-test of multivariate normality ksampleEtest() performs the multivariate E-test for equal distributions, complete version, from data matrix E2sample() computes the 2-sample E-statistic without creating distance poisMstat() computes the mean distance test of Poissonity */ #include #include void mvnEstat(double *y, int *byrow, int *nobs, int *dim, double *stat); void poisMstat(int *x, int *nx, double *stat); void ksampleEtest(double *x, int *byrow, int *nsamples, int *sizes, int *dim, int *R, double *e0, double *e, double *pval); void E2sample(double *x, int *sizes, int *dim, double *stat); double edist(double **D, int m, int n); double multisampleE(double **D, int nsamples, int *sizes, int *perm); double twosampleE(double **D, int m, int n, int *xrows, int *yrows); double E2(double **x, int *sizes, int *start, int ncol, int *perm); double Eksample(double *x, int *byrow, int r, int d, int K, int *sizes, int *ix); void distance(double **bxy, double **D, int N, int d); /* utilities.c */ extern double **alloc_matrix(int r, int c); extern int **alloc_int_matrix(int r, int c); extern void free_matrix(double **matrix, int r, int c); extern void free_int_matrix(int **matrix, int r, int c); extern void permute(int *J, int n); extern void roworder(double *x, int *byrow, int r, int c); extern void vector2matrix(double *x, double **y, int N, int d, int isroworder); extern void distance(double **bxy, double **D, int N, int d); extern void Euclidean_distance(double *x, double **Dx, int n, int d); extern void index_distance(double *x, double **Dx, int n, int d, double index); extern void sumdist(double *x, int *byrow, int *nrow, int *ncol, double *lowersum); void mvnEstat(double *y, int *byrow, int *nobs, int *dim, double *stat) { /* compute E test statistic for multivariate normality y is *standardized* multivariate sample best to have y in row order: e.g. y=as.double(t(y)) */ int d=(*dim), n=(*nobs); int i, j, k, p, maxterms=2000; double D=(double)(*dim); double meanyy, meanyz, meanzz; double delta, eps=1.0e-7; double normy, yy, dif, sum, sum0, term; double lg0, lg1,logak, loggk; if (*byrow == FALSE) roworder(y, byrow, n, d); lg0 = lgammafn(D/2.0); lg1 = lgammafn((D+1.0)/2.0); meanzz = 2.0 * exp(lg1 - lg0); /* second mean */ meanyz = 0.0; /* computing the first mean as series */ for (i=0; i eps && k < maxterms) { sum0 = sum; logak = (k+1)*log(yy) - lgammafn(k+1) - k*M_LN2 - log(2*k+1) - log(2*k+2); loggk = lg1 + lgammafn(k+1.5) - lgammafn(k+D/2+1); term = exp(logak + loggk); if (k % 2 == 0) sum += term; else sum -= term; delta = fabs(sum - sum0); k++; } if (delta < eps) meanyz += meanzz/M_SQRT2 + M_SQRT_2dPI * sum; else { meanyz += normy; Rf_warning("E|y-Z| did not converge, replaced by %f", normy); } } meanyz /= (double) n; sumdist(y, byrow, nobs, dim, &meanyy); /* computing third mean */ meanyy *= (2.0/(double)(n*n)); *stat = ((double) n)*(2.0*meanyz - meanzz - meanyy); return; } void poisMstat(int *x, int *nx, double *stat) { /* computes the Poisson mean distance statistic */ int i, j, k, n=(*nx); double eps=1.0e-10; double cvm, d, lambda, m, q; double Mcdf1, Mcdf0, Mpdf1, cdf1, cdf0; lambda = 0; for (i=0; i 1) Mcdf1 = 1.0; cdf1 = ppois(i, lambda, TRUE, FALSE); /* MLE of F(i) */ d = Mcdf1 - cdf1; cvm += d * d * (cdf1 - cdf0); cdf0 = cdf1; Mcdf0 = Mcdf1; } cvm *= n; *stat = cvm; } void E2sample(double *x, int *sizes, int *dim, double *stat) { /* compute test statistic *stat for testing H:F=G does not store distance matrix x must be in row order: x=as.double(t(x)) where x is pooled sample in matrix sum(en) by dim */ int m=sizes[0], n=sizes[1], d=(*dim); int i, j, k, p, q; double dif, dsum, sumxx, sumxy, sumyy, w; sumxy = 0.0; for (i=0; i 0) { data = alloc_matrix(N, d); /* sample matrix */ vector2matrix(x, data, N, d, *byrow); distance(data, D, N, d); free_matrix(data, N, d); } else vector2matrix(x, D, N, N, *byrow); *e0 = multisampleE(D, K, sizes, perm); /* bootstrap */ if (B > 0) { ek = 0; GetRNGstate(); for (b=0; b Created: June 15, 2004 (development) Last Modified: April 5, 2008 */ #include #include void indepE(double *x, double *y, int *byrow, int *dims, double *Istat); void indepEtest(double *x, double *y, int *byrow, int *dims, double *Istat, double *reps, double *pval); void squared_distance(double *x, double **D, int n, int d); extern double **alloc_matrix(int r, int c); extern int **alloc_int_matrix(int r, int c); extern void free_matrix(double **matrix, int r, int c); extern void free_int_matrix(int **matrix, int r, int c); extern void permute(int *J, int n); extern void roworder(double *x, int *byrow, int r, int c); extern void Euclidean_distance(double *x, double **D, int n, int d); void indepE(double *x, double *y, int *byrow, int *dims, double *Istat) { /* E statistic for multiv. indep. of X in R^p and Y in R^q statistic returned is I_n^2 [nI_n^2 has a limit dist under indep] dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) Istat : the statistic I_n (normalized) */ int i, j, k, m, n, p, q; double Cx, Cy, Cz, C3, C4, n2, n3, n4, v; double **D2x, **D2y; n = dims[0]; p = dims[1]; q = dims[2]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } D2x = alloc_matrix(n, n); D2y = alloc_matrix(n, n); Euclidean_distance(x, D2x, n, p); Euclidean_distance(y, D2y, n, q); Cx = Cy = Cz = C3 = C4 = 0.0; n2 = ((double) n) * n; n3 = n2 * n; n4 = n2 * n2; /* compute observed test statistic */ for (i=0; i Q dims[0] = n (sample size) dims[1] = p (dimension of X) dims[2] = q (dimension of Y) dims[3] = B (number of replicates, dimension of reps) Istat : the statistic I_n (normalized) */ int b, i, j, k, m, n, p, q, B, M; int *perm; double Cx, Cy, Cz, C3, C4, n2, n3, n4, v; double **D2x, **D2y; n = dims[0]; p = dims[1]; q = dims[2]; B = dims[3]; if (*byrow == FALSE) { /* avoid this step: use as.double(t(x)) in R */ roworder(x, byrow, n, p); *byrow = FALSE; /* false for y */ roworder(y, byrow, n, q); } D2x = alloc_matrix(n, n); D2y = alloc_matrix(n, n); squared_distance(x, D2x, n, p); squared_distance(y, D2y, n, q); Cx = Cy = Cz = C3 = C4 = 0.0; n2 = ((double) n) * n; n3 = n2 * n; n4 = n2 * n2; /* compute observed test statistic */ for (i=0; i 0) { GetRNGstate(); perm = Calloc(n, int); for (i=0; i= (*Istat)) M++; } *pval = (double) M / (double) B; PutRNGstate(); Free(perm); } free_matrix(D2x, n, n); free_matrix(D2y, n, n); return; } void squared_distance(double *x, double **D2, int n, int d) { /* interpret x as an n by d matrix, in row order (n vectors in R^d) compute the squared distance matrix D2 */ int i, j, k, p, q; double dsum, dif; for (i=1; i 0) reps <- rep(0, R) pval <- 1 dims <- c(n, ncol(x), ncol(y), dst, R) # dcov = [dCov,dCor,dVar(x),dVar(y)] a <- .C("dCOVtest", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), index = as.double(index), reps = as.double(reps), DCOV = as.double(dcov), pval = as.double(pval), PACKAGE = "energy") # test statistic is n times the square of dCov statistic stat <- n * a$DCOV[1]^2 dcorr <- a$DCOV V <- dcorr[[1]] names(stat) <- "nV^2" names(V) <- "dCov" dataname <- paste("index ", index, ", replicates ", R, sep="") pval <- ifelse (R < 1, NA, a$pval) e <- list( method = paste("dCov test of independence", sep = ""), statistic = stat, estimate = V, estimates = dcorr, p.value = pval, replicates = n* a$reps^2, data.name = dataname) class(e) <- "htest" return(e) } .dcov <- function(x, y, index=1.0) { # distance covariance statistic for independence # dcov = [dCov,dCor,dVar(x),dVar(y)] (vector) # this function provides the fast method for computing dCov # it is called by the dcov and dcor functions if (!(class(x) == "dist")) x <- dist(x) if (!(class(y) == "dist")) y <- dist(y) x <- as.matrix(x) y <- as.matrix(y) dst <- TRUE n <- nrow(x) m <- nrow(y) if (n != m) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") dims <- c(n, NCOL(x), NCOL(y), dst) idx <- 1:dims[1] DCOV <- numeric(4) a <- .C("dCOV", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), index = as.double(index), idx = as.double(idx), DCOV = as.double(DCOV), PACKAGE = "energy") return(a$DCOV) } dcov <- function(x, y, index=1.0) { # distance correlation statistic for independence return(.dcov(x, y, index)[1]) } dcor <- function(x, y, index=1.0) { # distance correlation statistic for independence return(.dcov(x, y, index)[2]) } DCOR <- function(x, y, index=1.0) { # distance covariance and correlation statistics # alternate method, implemented in R without .C call # this method is usually slower than the C version if (!(class(x) == "dist")) x <- dist(x) if (!(class(y) == "dist")) y <- dist(y) x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") if (index < 0 || index > 2) { warning("index must be in [0,2), using default index=1") index=1.0} stat <- 0 dims <- c(n, ncol(x), ncol(y)) Akl <- function(x) { d <- as.matrix(x)^index m <- rowMeans(d) M <- mean(d) a <- sweep(d, 1, m) b <- sweep(a, 2, m) return(b + M) } A <- Akl(x) B <- Akl(y) dCov <- sqrt(mean(A * B)) dVarX <- sqrt(mean(A * A)) dVarY <- sqrt(mean(B * B)) V <- sqrt(dVarX * dVarY) if (V > 0) dCor <- dCov / V else dCor <- 0 return(list(dCov=dCov, dCor=dCor, dVarX=dVarX, dVarY=dVarY)) } energy/R/Ecluster.R0000644000176000001440000000736412423524166013724 0ustar ripleyusersedist <- function(x, sizes, distance = FALSE, ix = 1:sum(sizes), alpha = 1, method = c("cluster","discoB","discoF")) { # computes the e-dissimilarity matrix between k samples or clusters # x: pooled sample or Euclidean distances # sizes: vector of sample (cluster) sizes # distance: TRUE if x is a distance matrix, otherwise FALSE # ix: a permutation of row indices of x # alpha: distance exponent # method: cluster distances or disco statistics # k <- length(sizes) if (k == 1) return (as.dist(0.0)) if (k < 1) return (NA) e <- matrix(nrow=k, ncol=k) n <- cumsum(sizes) m <- 1 + c(0, n[1:(k-1)]) if (distance == FALSE) { if (is.vector(x)) x <- matrix(x, nrow = length(x), ncol = 1) dst <- as.matrix(dist(x)) } else dst <- as.matrix(x) if (alpha != 1) { if (alpha <= 0 || alpha > 2) warning("exponent alpha should be in (0,2]") dst <- dst^alpha } type <- match.arg(method) if (type == "cluster") { for (i in 1:(k - 1)) { e[i, i] <- 0.0 for (j in (i + 1):k) { n1 <- sizes[i] n2 <- sizes[j] ii <- ix[m[i]:n[i]] jj <- ix[m[j]:n[j]] w <- n1 * n2 / (n1 + n2) m11 <- sum(dst[ii, ii]) / (n1 * n1) m22 <- sum(dst[jj, jj]) / (n2 * n2) m12 <- sum(dst[ii, jj]) / (n1 * n2) e[i, j] <- e[j, i] <- w * ((m12 + m12) - (m11 + m22)) } } } if (type == "discoF" || type == "discoB") { #disco statistics for testing F=G for (i in 1:(k - 1)) { e[i, i] <- 0.0 for (j in (i + 1):k) { n1 <- sizes[i] n2 <- sizes[j] ii <- ix[m[i]:n[i]] jj <- ix[m[j]:n[j]] J <- c(ii,jj) d <- dst[J, J] N <- NROW(d) total <- sum(d) / (2*N) trt <- factor(c(rep(1,n1),rep(2,n2))) y <- as.vector(d[,1]) M <- model.matrix(y ~ 0 + trt) G <- t(M) %*% d %*% M withins <- diag(G) / (2*c(n1,n2)) W <- sum(withins) B <- total - W ifelse (type == "discoF", e[i,j] <- e[j,i] <- B / (W/(N-2)), e[i,j] <- e[j,i] <- B) } } } e <- as.dist(e) attr(e,"method") <- paste(method,": index= ", alpha) e } energy.hclust <- function(dst, alpha = 1) { d <- dst if (is.matrix(dst)) { if (nrow(dst) != ncol(dst) || sum(dst != t(dst)) > 0) stop("distance matrix must be square symmetric") d <- as.dist(dst) attr(d, "Labels") <- row.names(dst) } n <- attr(d, "Size") if (is.null(n)) stop("dst argument must be square matrix or dist object") if (alpha != 1) { if (alpha <= 0 || alpha > 2) warning("exponent alpha should be in (0,2]") d <- d^alpha } labels <- attr(d, "Labels") if (is.null(labels)) labels <- paste(1:n) merge <- integer(2 * (n - 1)) height <- double(n - 1) order <- integer(n) ecl <- .C("Emin_hclust", diss = as.double(d), en = as.integer(n), merge = as.integer(merge), height = as.double(height), order = as.integer(order), PACKAGE = "energy") merge <- matrix(ecl$merge, nrow = n - 1, ncol = 2) e <- list(merge = merge, height = ecl$height, order = ecl$order, labels = labels, method = "e-distance", call = match.call(), dist.method = attr(dst, "method")) class(e) <- "hclust" e } energy/R/disco.R0000644000176000001440000001443212423524166013231 0ustar ripleyusers### disco tests - implementation of DIStance COmponents methods in: ### ### Rizzo, M.L. and Szekely, G.J. (2010) "DISCO Analysis: A Nonparametric ### Extension of Analysis of Variance, Annals of Applied Statistics ### Vol. 4, No. 2, 1034-1055. ### ### Sept 2010 parts of disco package merged into energy package ### this release supports one way models ### this version does not use the C library ### ### disco: computes the decomposition and test using F ratio ### disco.between: statistic and test using between component ### .disco1: internal computations for one factor ### .disco1stat, .disco1Bstat: internal for boot function ### ### disco <- function(x, factors, distance=FALSE, index=1.0, R=0, method=c("disco","discoB","discoF")) { ## x is response or Euclidean distance matrix or dist() object ## factors is a matrix or data frame of group labels ## distance=TRUE if x is distance, otherwise FALSE ## index is the exponent on distance, in (0,2] ## R is number of replicates for test ## method: use F ratio (default) or between component (discoB) ## disco method is currently alias for discoF method <-match.arg(method) factors <- data.frame(factors) if (method=="discoB") return(disco.between(x, factors=factors, distance=distance, index=index, R=R)) nfactors <- NCOL(factors) if (distance) dst <- as.matrix(x) else dst <- as.matrix(dist(x)) N <- NROW(dst) if (NCOL(dst) != N) stop("distance==TRUE but first argument is not distance") if(!isTRUE(all.equal(index, 1))) dst <- dst^index stats <- matrix(0, nfactors, 6) colnames(stats) <- c("Trt","Within","df1","df2","Stat","p-value") for (j in 1:nfactors) { trt <- factors[,j] stats[j, 1:4] <- .disco1(trt=trt, dst=dst) if (R > 0) { b <- boot(data = dst, statistic = .disco1stat, sim = "permutation", R = R, trt = trt) stats[j, 5] <- b$t0 stats[j, 6] <- (sum(b$t > b$t0) + 1) / (R + 1) } else { stats[j, 5] <- .disco1stat(dst, i=1:nrow(dst), trt=trt) stats[j, 6] <- NA } } methodname <- "DISCO (F ratio)" dataname <- deparse(substitute(x)) total <- sum(stats[1,1:2]) within <- total - sum(stats[ ,1]) Df.trt <- stats[, 3] factor.names <- names(factors) factor.levels <- sapply(factors, nlevels) sizes <- sapply(factors, tabulate) e <- list( call = match.call(), method = methodname, statistic = stats[ ,5], p.value = stats[ ,6], k = nfactors, N = N, between = stats[ ,1], withins = stats[ ,2], within = within, total = total, Df.trt = Df.trt, Df.e = nrow(dst) - sum(Df.trt) - 1, index = index, factor.names = factor.names, factor.levels = factor.levels, sample.sizes = sizes, stats = stats ) class(e) <- "disco" e } disco.between <- function(x, factors, distance=FALSE, index=1.0, R=0) { ## disco test based on the between-sample component ## similar to disco except that "disco" test is based on the F ratio ## disco.between test for one factor (balanced) is asymptotically ## equivalent to k-sample E test (test statistics are proportional ## in that case but not in general). ## x is response or Euclidean distance matrix or dist() object ## factors is a matrix or data frame of group labels ## distance=TRUE if x is distance, otherwise FALSE ## index is the exponent on distance, in (0,2] factors <- data.frame(factors) nfactors <- NCOL(factors) if (nfactors > 1) stop("More than one factor is not implemented in disco.between") if (distance) dst <- as.matrix(x) else dst <- as.matrix(dist(x)) N <- NROW(dst) if (NCOL(dst) != N) stop("distance==TRUE but first argument is not distance") if(!isTRUE(all.equal(index, 1))) dst <- dst^index trt <- factors[, 1] if (R > 0) { b <- boot(data = dst, statistic = .disco1Bstat, sim = "permutation", R = R, trt = trt) between <- b$t0 reps <- b$t pval <- mean(reps >= between) } else { between <- .disco1Bstat(dst, i=1:nrow(dst), trt=trt) pval <- NA } if (R == 0) return (between) methodname <- "DISCO (Between-sample)" dataname <- deparse(substitute(x)) names(between) <- "DISCO between statistic" e <- list( call = match.call(), method = methodname, statistic = between, p.value = pval, data.name = dataname) class(e) <- "htest" e } .disco1 <- function(trt, dst) { ## dst is Euclidean distance matrix or power of it ## trt is the treatment, a factor trt <- factor(trt) k <- nlevels(trt) n <- tabulate(trt) N <- sum(n) total <- sum(dst) / (2*N) y <- as.vector(dst[,1]) M <- model.matrix(y ~ 0 + trt) G <- t(M) %*% dst %*% M withins <- diag(G) / (2*n) W <- sum(withins) B <- total - W c(B, W, k-1, N-k) } .disco1stat <- function(dst, i, trt) { ## i is permuation vector supplied by bootstrap ## dst is Euclidean distance matrix or power of it ## trt is the treatment, a factor ## returns the disco "F" ratio idx <- 1:nrow(dst) d <- .disco1(trt=trt[idx[i]], dst=dst) statistic <- (d[1]/d[3]) / (d[2]/d[4]) } .disco1Bstat <- function(dst, i, trt) { ## i is permuation vector supplied by bootstrap ## dst is Euclidean distance matrix or power of it ## trt is the treatment, a factor ## returns the between-sample component (for one factor) idx <- 1:nrow(dst) .disco1(trt=trt[idx[i]], dst=dst)[1] } print.disco <- function(x, ...) { k <- x$k md1 <- x$between / x$Df.trt md2 <- x$within / x$Df.e f0 <- x$statistic print(x$call) cat(sprintf("\nDistance Components: index %5.2f\n", x$index)) cat(sprintf("%-20s %4s %10s %10s %10s %10s\n", "Source", "Df","Sum Dist", "Mean Dist", "F-ratio", "p-value")) for (i in 1:k) { fname <- x$factor.names[i] cat(sprintf("%-20s %4d %10.5f %10.5f %10.3f %10s\n", fname, x$Df.trt[i], x$between[i], md1[i], f0[i], format.pval(x$p.value[i]))) } cat(sprintf("%-20s %4d %10.5f %10.5f\n", "Within", x$Df.e, x$within, md2)) cat(sprintf("%-20s %4d %10.5f\n", "Total", x$N-1, x$total)) } energy/R/Eindep.R0000644000176000001440000001040312423524166013326 0ustar ripleyusersindep.test<- function(x, y, method = c("dcov","mvI"), index = 1, R = 199) { # two energy tests for multivariate independence type <- match.arg(method) if (type == "dcov") return(dcov.test(x, y, index, R)) else if (type == "mvI") return(mvI.test(x, y, R)) } mvI <- function(x, y) { # energy statistic for multivariate independence # returns dependence coefficient I_n x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m || n < 2) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") stat <- 0 dims <- c(n, ncol(x), ncol(y)) e <- .C("indepE", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), stat = as.double(stat), PACKAGE = "energy") sqrt(e$stat) } mvI.test<- function(x, y, R=199) { # energy test for multivariate independence x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m || n < 2) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") stat <- reps <- 0 if (R > 0) reps <- rep(0, R) pval <- 1 dims <- c(n, ncol(x), ncol(y), R) a <- .C("indepEtest", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), stat = as.double(stat), reps = as.double(reps), pval = as.double(pval), PACKAGE = "energy") stat <- n*a$stat est <- sqrt(a$stat) names(est) <- "I" names(stat) <- "nI^2" dataname <- paste("x (",n," by ",ncol(x), "), y(",n," by ", ncol(y), "), replicates ", R, sep="") e <- list( method = "mvI energy test of independence", statistic = stat, estimate = est, replicates = n*reps, p.value = a$pval, data.name = dataname) class(e) <- "htest" e } indep.e<- function(x, y) { # energy statistic for multivariate independence (deprecated) .Deprecated(new = "mvI", package = "energy") x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m || n < 2) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") stat <- 0 dims <- c(n, ncol(x), ncol(y)) e <- .C("indepE", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), stat = as.double(stat), PACKAGE = "energy") sqrt(e$stat) } indep.etest<- function(x, y, R=199) { # energy test for multivariate independence (deprecated) .Deprecated(new = "indep.test", package = "energy", msg = "indep.etest will become defunct in future release. Use indep.test with method mvI.") x <- as.matrix(x) y <- as.matrix(y) n <- nrow(x) m <- nrow(y) if (n != m || n < 2) stop("Sample sizes must agree") if (! (all(is.finite(c(x, y))))) stop("Data contains missing or infinite values") stat <- reps <- 0 if (R > 0) reps <- rep(0, R) pval <- 1 dims <- c(n, ncol(x), ncol(y), R) a <- .C("indepEtest", x = as.double(t(x)), y = as.double(t(y)), byrow = as.integer(TRUE), dims = as.integer(dims), stat = as.double(stat), reps = as.double(reps), pval = as.double(pval), PACKAGE = "energy") stat <- sqrt(a$stat) names(stat) <- "I" dataname <- paste("x (",n," by ",ncol(x), "), y(",n," by ", ncol(y), "), replicates ", R, sep="") e <- list( method = paste("Energy test of independence", sep = ""), statistic = stat, p.value = a$pval, data.name = dataname) class(e) <- "htest" e } energy/MD50000644000176000001440000000273112423640203012102 0ustar ripleyusers16d02681c12f3fc1b1cebb17bfcf7465 *DESCRIPTION 059e8192ea9ded88953dc96dc3373553 *NAMESPACE 56e91775e3e09e8d38ff57bf6ea5dc1c *NEWS f38c6e603fdd6d5c1616fb1e4c6d7e1c *R/Ecluster.R c961a1fc1fe587f581de6d2789075a44 *R/Eeqdist.R 4823032fab93b6671c67286c2b414e8b *R/Eindep.R 753c46960af21c6e8f0c58cbfd269771 *R/Emvnorm.R 93d99235131bef02cd900f98a6774db8 *R/Epoisson.R a95cc44081be8924f9fb9cf90155fc2d *R/dcorT.R 8c9f664aa4aff50d4af545dba32f7796 *R/dcov.R f54b11da20b5dc09ac01fd040fedbfb7 *R/disco.R b39bd9bc5419e4f6f6714f62baac4108 *man/dcor.ttest.Rd 30b6783c560071a703fc8590528aa8b5 *man/dcov.Rd c48f845eb9f357f1389cac4aa801415f *man/dcov.test.Rd 01d1550c0307e1146ea8ed8f5cedf725 *man/disco.Rd 74ea7edd6a938e58dd54cc06cc5c9b46 *man/edist.Rd f4b523fbfc9ad1611a257a0a056ae110 *man/energy-deprecated.Rd cf155ebd69899bf543305400811fd946 *man/energy-package.Rd 38b26115e680d4073c6f68c738d3a9f4 *man/energy.hclust.Rd 6a5cde435655fa69be0bf3bde21a1386 *man/eqdist.etest.Rd 842258d5d238e776421f6feb555ea045 *man/indep.test.Rd a720827f5d0e53c550193f02af2707e3 *man/mvI.test.Rd 682acba195e9396a00d091a2fc21f6fa *man/mvnorm.etest.Rd 75861231b6bc360fd6f311445445c627 *man/poisson.mtest.Rd 727842fcced81550ab308552381b5b1b *src/ECl.cc 60f2661bf08b2295e345cf799b8bb28e *src/ECl.h f93deb94d53b0be60435ac02150a8009 *src/Ecluster.cc 2a0d7e13abeedc9d58e8c8b438d93f50 *src/Eindep.c da0b2398098bc08ae54f647b29cf60b5 *src/dcov.c 808baf42556a58eedb172fe3882a0aa2 *src/energy.c 9ad5c89ba30013de49a6cd0fd94c7396 *src/utilities.c energy/DESCRIPTION0000644000176000001440000000142312423640203013275 0ustar ripleyusersPackage: energy Title: E-statistics (energy statistics) Version: 1.6.2 Date: 2014-10-27 Author: Maria L. Rizzo and Gabor J. Szekely Description: E-statistics (energy) tests and statistics for comparing distributions: multivariate normality, multivariate distance components and k-sample test for equal distributions, hierarchical clustering by e-distances, multivariate independence tests, distance correlation, goodness-of-fit tests. Energy- statistics concept based on a generalization of Newton's potential energy is due to Gabor J. Szekely. Maintainer: Maria Rizzo Imports: boot License: GPL (>= 2) NeedsCompilation: yes Repository: CRAN Packaged: 2014-10-27 20:17:58 UTC; Maria Date/Publication: 2014-10-28 08:06:43 energy/man/0000755000176000001440000000000012423524166012353 5ustar ripleyusersenergy/man/dcov.Rd0000644000176000001440000001323212423524166013576 0ustar ripleyusers\name{distance correlation} \alias{dcor} \alias{dcov} \alias{DCOR} \title{ Distance Correlation and Covariance Statistics} \description{ Computes distance covariance and distance correlation statistics, which are multivariate measures of dependence. } \usage{ dcov(x, y, index = 1.0) dcor(x, y, index = 1.0) DCOR(x, y, index = 1.0) } \arguments{ \item{x}{ data or distances of first sample} \item{y}{ data or distances of second sample} \item{index}{ exponent on Euclidean distance, in (0,2]} } \details{ \code{dcov} and \code{dcor} or \code{DCOR} compute distance covariance and distance correlation statistics. \code{DCOR} is a self-contained R function returning a list of statistics. \code{dcor} execution is faster than \code{DCOR} (see examples). The sample sizes (number of rows) of the two samples must agree, and samples must not contain missing values. Arguments \code{x}, \code{y} can optionally be \code{\link{dist}} objects; otherwise these arguments are treated as data. Distance correlation is a new measure of dependence between random vectors introduced by Szekely, Rizzo, and Bakirov (2007). For all distributions with finite first moments, distance correlation \eqn{\mathcal R}{R} generalizes the idea of correlation in two fundamental ways: (1) \eqn{\mathcal R(X,Y)}{R(X,Y)} is defined for \eqn{X} and \eqn{Y} in arbitrary dimension. (2) \eqn{\mathcal R(X,Y)=0}{R(X,Y)=0} characterizes independence of \eqn{X} and \eqn{Y}. Distance correlation satisfies \eqn{0 \le \mathcal R \le 1}{0 \le R \le 1}, and \eqn{\mathcal R = 0}{R = 0} only if \eqn{X} and \eqn{Y} are independent. Distance covariance \eqn{\mathcal V}{V} provides a new approach to the problem of testing the joint independence of random vectors. The formal definitions of the population coefficients \eqn{\mathcal V}{V} and \eqn{\mathcal R}{R} are given in (SRB 2007). The definitions of the empirical coefficients are as follows. The empirical distance covariance \eqn{\mathcal{V}_n(\mathbf{X,Y})}{V_n(X,Y)} with index 1 is the nonnegative number defined by \deqn{ \mathcal{V}^2_n (\mathbf{X,Y}) = \frac{1}{n^2} \sum_{k,\,l=1}^n A_{kl}B_{kl} }{ V^2_n (X,Y) = (1/n^2) sum_{k,l=1:n} A_{kl}B_{kl} } where \eqn{A_{kl}} and \eqn{B_{kl}} are \deqn{ A_{kl} = a_{kl}-\bar a_{k.}- \bar a_{.l} + \bar a_{..} } \deqn{ B_{kl} = b_{kl}-\bar b_{k.}- \bar b_{.l} + \bar b_{..}. } Here \deqn{ a_{kl} = \|X_k - X_l\|_p, \quad b_{kl} = \|Y_k - Y_l\|_q, \quad k,l=1,\dots,n, }{ a_{kl} = ||X_k - X_l||_p, b_{kl} = ||Y_k - Y_l||_q, k,l=1,\dots,n, } and the subscript \code{.} denotes that the mean is computed for the index that it replaces. Similarly, \eqn{\mathcal{V}_n(\mathbf{X})}{V_n(X)} is the nonnegative number defined by \deqn{ \mathcal{V}^2_n (\mathbf{X}) = \mathcal{V}^2_n (\mathbf{X,X}) = \frac{1}{n^2} \sum_{k,\,l=1}^n A_{kl}^2. }{ V^2_n (X) = V^2_n (X,X) = (1/n^2) sum_{k,l=1:n} A_{kl}^2. } The empirical distance correlation \eqn{\mathcal{R}_n(\mathbf{X,Y})}{R(\mathbf{X,Y})} is the square root of \deqn{ \mathcal{R}^2_n(\mathbf{X,Y})= \frac {\mathcal{V}^2_n(\mathbf{X,Y})} {\sqrt{ \mathcal{V}^2_n (\mathbf{X}) \mathcal{V}^2_n(\mathbf{Y})}}. }{ R^2_n(X,Y)= V^2_n(X,Y) / sqrt(V^2_n (X) V^2_n(Y)). } See \code{\link{dcov.test}} for a test of multivariate independence based on the distance covariance statistic. } \value{ \code{dcov} returns the sample distance covariance and \code{dcor} returns the sample distance correlation. \code{DCOR} returns a list with elements \item{dCov}{sample distance covariance} \item{dCor}{sample distance correlation} \item{dVarX}{distance variance of x sample} \item{dVarY}{distance variance of y sample} } \note{ Two methods of computing the statistics are provided. \code{DCOR} is a stand-alone R function that returns a list of statistics. \code{dcov} and \code{dcor} provide R interfaces to the C implementation, which is usually faster. \code{dcov} and \code{dcor} call an internal function \code{.dcov}. Note that it is inefficient to compute dCor by: square root of \code{dcov(x,y)/sqrt(dcov(x,x)*dcov(y,y))} because the individual calls to \code{dcov} involve unnecessary repetition of calculations. For this reason, both \code{.dcov} and \code{DCOR} compute and return all four statistics. } \seealso{ \code{\link{dcov.test}} \code{\link{dcor.ttest}} } \references{ Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007), Measuring and Testing Dependence by Correlation of Distances, \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794. \cr \url{http://dx.doi.org/10.1214/009053607000000505} Szekely, G.J. and Rizzo, M.L. (2009), Brownian Distance Covariance, \emph{Annals of Applied Statistics}, Vol. 3, No. 4, 1236-1265. \cr \url{http://dx.doi.org/10.1214/09-AOAS312} Szekely, G.J. and Rizzo, M.L. (2009), Rejoinder: Brownian Distance Covariance, \emph{Annals of Applied Statistics}, Vol. 3, No. 4, 1303-1308. } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ x <- iris[1:50, 1:4] y <- iris[51:100, 1:4] dcov(x, y) dcov(dist(x), dist(y)) #same thing ## C implementation dcov(x, y, 1.5) dcor(x, y, 1.5) .dcov(dist(x), dist(y), 1.5) ## R implementation DCOR(x, y, 1.5) \dontrun{ ## compare speed of R version and C version set.seed(111) ## R version system.time(replicate(1000, DCOR(x, y))) set.seed(111) ## C version system.time(replicate(1000, .dcov(x, y))) } } \keyword{ multivariate } \concept{ independence } \concept{ distance correlation } \concept{ distance covariance } \concept{ energy statistics } energy/man/eqdist.etest.Rd0000644000176000001440000001407712423524166015267 0ustar ripleyusers\name{eqdist.etest} \alias{eqdist.etest} \alias{eqdist.e} \alias{ksample.e} \title{Multisample E-statistic (Energy) Test of Equal Distributions} \description{ Performs the nonparametric multisample E-statistic (energy) test for equality of multivariate distributions. } \usage{ eqdist.etest(x, sizes, distance = FALSE, method=c("original","discoB","discoF"), R = 999) eqdist.e(x, sizes, distance = FALSE, method=c("original","discoB","discoF")) ksample.e(x, sizes, distance = FALSE, method=c("original","discoB","discoF"), ix = 1:sum(sizes)) } \arguments{ \item{x}{ data matrix of pooled sample} \item{sizes}{ vector of sample sizes} \item{distance}{logical: if TRUE, first argument is a distance matrix} \item{method}{ use original (default) or distance components (discoB, discoF)} \item{R}{ number of bootstrap replicates } \item{ix}{ a permutation of the row indices of x } } \details{ The k-sample multivariate \eqn{\mathcal{E}}{E}-test of equal distributions is performed. The statistic is computed from the original pooled samples, stacked in matrix \code{x} where each row is a multivariate observation, or the corresponding distance matrix. The first \code{sizes[1]} rows of \code{x} are the first sample, the next \code{sizes[2]} rows of \code{x} are the second sample, etc. The test is implemented by nonparametric bootstrap, an approximate permutation test with \code{R} replicates. The function \code{eqdist.e} returns the test statistic only; it simply passes the arguments through to \code{eqdist.etest} with \code{R = 0}. The k-sample multivariate \eqn{\mathcal{E}}{E}-statistic for testing equal distributions is returned. The statistic is computed from the original pooled samples, stacked in matrix \code{x} where each row is a multivariate observation, or from the distance matrix \code{x} of the original data. The first \code{sizes[1]} rows of \code{x} are the first sample, the next \code{sizes[2]} rows of \code{x} are the second sample, etc. The two-sample \eqn{\mathcal{E}}{E}-statistic proposed by Szekely and Rizzo (2004) is the e-distance \eqn{e(S_i,S_j)}, defined for two samples \eqn{S_i, S_j} of size \eqn{n_i, n_j} by \deqn{e(S_i,S_j)=\frac{n_i n_j}{n_i+n_j}[2M_{ij}-M_{ii}-M_{jj}], }{e(S_i, S_j) = (n_i n_j)(n_i+n_j)[2M_(ij)-M_(ii)-M_(jj)],} where \deqn{M_{ij}=\frac{1}{n_i n_j}\sum_{p=1}^{n_i} \sum_{q=1}^{n_j} \|X_{ip}-X_{jq}\|,}{ M_{ij} = 1/(n_i n_j) sum[1:n_i, 1:n_j] ||X_(ip) - X_(jq)||,} \eqn{\|\cdot\|}{|| ||} denotes Euclidean norm, and \eqn{X_{ip}}{ X_(ip)} denotes the p-th observation in the i-th sample. The original (default method) k-sample \eqn{\mathcal{E}}{E}-statistic is defined by summing the pairwise e-distances over all \eqn{k(k-1)/2} pairs of samples: \deqn{\mathcal{E}=\sum_{1 \leq i < j \leq k} e(S_i,S_j). }{\emph{E} = sum[i=0] (\hat F(j) - F(j; \hat \lambda))^2 f(j; \hat \lambda).} The test is implemented by parametric bootstrap with \code{R} replicates. } \value{ The function \code{poisson.m} returns the test statistic. The function \code{poisson.mtest} returns a list with class \code{htest} containing \item{method}{Description of test} \item{statistic}{observed value of the test statistic} \item{p.value}{approximate p-value of the test} \item{data.name}{description of data} \item{estimate}{sample mean} } \references{ Szekely, G. J. and Rizzo, M. L. (2004) Mean Distance Test of Poisson Distribution, \emph{Statistics and Probability Letters}, 67/3, 241-247. \url{http://dx.doi.org/10.1016/j.spl.2004.01.005}. } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ x <- rpois(20, 1) poisson.m(x) poisson.mtest(x, R = 199) } \keyword{ htest } energy/man/energy-deprecated.Rd0000644000176000001440000000414012423524166016230 0ustar ripleyusers\name{indep.etest} \alias{indep.e} \alias{indep.etest} \title{ Energy Statistic Test of Independence} \description{Deprecated: use \code{indep.test} with \code{method = mvI}. Computes a multivariate nonparametric E-statistic and test of independence.} \usage{ indep.e(x, y) indep.etest(x, y, R=199) } \arguments{ \item{x}{ matrix: first sample, observations in rows} \item{y}{ matrix: second sample, observations in rows} \item{R}{ number of replicates} } \details{ Computes the coefficient \eqn{\mathcal I}{I_n} and performs a nonparametric \eqn{\mathcal E}{E}-test of independence. The test decision is obtained via bootstrap, with \code{R} replicates. The sample sizes (number of rows) of the two samples must agree, and samples must not contain missing values. The statistic \eqn{\mathcal E = n \mathcal I^2}{E = I^2} is a ratio of V-statistics based on interpoint distances \eqn{\|x_{i}-y_{j}\|}{||x_{i}-y_{j}||}. See the reference below for details. } \value{ The sample coefficient \eqn{\mathcal I}{I} is returned by \code{indep.e}. The function \code{indep.etest} returns a list with class \code{htest} containing \item{method}{description of test} \item{statistic}{observed value of the coefficient \eqn{\mathcal I}{I}} \item{p.value}{approximate p-value of the test} \item{data.name}{description of data} } \references{ Bakirov, N.K., Rizzo, M.L., and Szekely, G.J. (2006), A Multivariate Nonparametric Test of Independence, \emph{Journal of Multivariate Analysis} 93/1, 58-80, \cr \url{http://dx.doi.org/10.1016/j.jmva.2005.10.005} } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ \dontrun{ ## independent univariate data x <- sin(runif(30, 0, 2*pi) * 2) y <- sin(runif(30, 0, 2*pi) * 4) indep.etest(x, y, R = 99) ## dependent multivariate data Sigma <- matrix(c(1, .1, 0, 0 , 1, 0, 0 ,.1, 1), 3, 3) x <- mvrnorm(30, c(0, 0, 0), diag(3)) y <- mvrnorm(30, c(0, 0, 0), Sigma) * x indep.etest(x, y, R = 99) } } \keyword{ htest } \keyword{ multivariate } \concept{ energy statistics } energy/man/indep.test.Rd0000644000176000001440000001123212423524166014716 0ustar ripleyusers\name{indep.test} \alias{indep.test} \title{ Energy Statistic Tests of Independence} \description{ Computes a multivariate nonparametric test of independence. The default method implements the distance covariance test \code{\link{dcov.test}}. } \usage{ indep.test(x, y, method = c("dcov","mvI"), index = 1, R = 199) } \arguments{ \item{x}{ matrix: first sample, observations in rows} \item{y}{ matrix: second sample, observations in rows} \item{method}{ a character string giving the name of the test} \item{index}{ exponent on Euclidean distances} \item{R}{ number of replicates} } \details{ \code{indep.test} with the default \code{method = "dcov"} computes the distance covariance test of independence. \code{index} is an exponent on the Euclidean distances. Valid choices for \code{index} are in (0,2], with default value 1 (Euclidean distance). The arguments are passed to the \code{dcov.test} function. See the help topic \code{\link{dcov.test}} for the description and documentation and also see the references below. \code{indep.test} with \code{method = "mvI"} computes the coefficient \eqn{\mathcal I_n}{I_n} and performs a nonparametric \eqn{\mathcal E}{E}-test of independence. The arguments are passed to \code{mvI.test}. The \code{index} argument is ignored (\code{index = 1} is applied). See the help topic \code{\link{mvI.test}} and also see the reference (2006) below for details. The test decision is obtained via bootstrap, with \code{R} replicates. The sample sizes (number of rows) of the two samples must agree, and samples must not contain missing values. These energy tests of independence are based on related theoretical results, but different test statistics. The \code{dcov} method is faster than \code{mvI} method by approximately a factor of O(n). } \value{ \code{indep.test} returns a list with class \code{htest} containing \item{ method}{description of test} \item{ statistic}{observed value of the test statistic \eqn{n \mathcal V_n^2}{n V_n^2} or \eqn{n \mathcal I_n^2}{n I_n^2}} \item{ estimate}{ \eqn{\mathcal V_n}{V_n} or \eqn{\mathcal I_n}{I_n}} \item{ estimates}{ a vector [dCov(x,y), dCor(x,y), dVar(x), dVar(y)] (method dcov)} \item{ replicates}{ replicates of the test statistic} \item{ p.value}{approximate p-value of the test} \item{ data.name}{description of data} } \note{As of energy-1.1-0, \code{indep.etest} is deprecated and replaced by \code{indep.test}, which has methods for two different energy tests of independence. \code{indep.test} applies the distance covariance test (see \code{dcov.test}) by default (\code{method = "dcov"}). The original \code{indep.etest} applied the independence coefficient \eqn{\mathcal I_n}{I_n}, which is now obtained by \code{method = "mvI"}. } \seealso{ \code{ \link{dcov.test} } \code{ \link{mvI.test} } \code{ \link{dcov} } \code{ \link{mvI} } } \references{ Szekely, G.J. and Rizzo, M.L. (2009), Brownian Distance Covariance, \emph{Annals of Applied Statistics}, Vol. 3 No. 4, pp. 1236-1265. (Also see discussion and rejoinder.) \cr \url{http://dx.doi.org/10.1214/09-AOAS312} Szekely, G.J., Rizzo, M.L., and Bakirov, N.K. (2007), Measuring and Testing Dependence by Correlation of Distances, \emph{Annals of Statistics}, Vol. 35 No. 6, pp. 2769-2794. \cr \url{http://dx.doi.org/10.1214/009053607000000505} Bakirov, N.K., Rizzo, M.L., and Szekely, G.J. (2006), A Multivariate Nonparametric Test of Independence, \emph{Journal of Multivariate Analysis} 93/1, 58-80, \cr \url{http://dx.doi.org/10.1016/j.jmva.2005.10.005} } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ ## independent multivariate data x <- matrix(rnorm(60), nrow=20, ncol=3) y <- matrix(rnorm(40), nrow=20, ncol=2) indep.test(x, y, method = "dcov", R = 99) indep.test(x, y, method = "mvI", R = 99) \dontrun{ ## dependent multivariate data if (require(MASS)) { Sigma <- matrix(c(1, .1, 0, 0 , 1, 0, 0 ,.1, 1), 3, 3) x <- mvrnorm(30, c(0, 0, 0), diag(3)) y <- mvrnorm(30, c(0, 0, 0), Sigma) * x indep.test(x, y, R = 99) #dcov method indep.test(x, y, method = "mvI", R = 99) } } \dontrun{ ## compare the computing time x <- mvrnorm(50, c(0, 0, 0), diag(3)) y <- mvrnorm(50, c(0, 0, 0), Sigma) * x set.seed(123) system.time(indep.test(x, y, method = "dcov", R = 1000)) set.seed(123) system.time(indep.test(x, y, method = "mvI", R = 1000)) } } \keyword{ htest } \keyword{ multivariate } \keyword{ nonparametric } \concept{ independence } \concept{ energy statistics } energy/man/mvnorm.etest.Rd0000644000176000001440000000574312423524166015314 0ustar ripleyusers\name{mvnorm.etest} \alias{mvnorm.etest} \alias{mvnorm.e} \alias{normal.e} \title{E-statistic (Energy) Test of Multivariate Normality} \description{ Performs the E-statistic (energy) test of multivariate or univariate normality. } \usage{ mvnorm.etest(x, R = 999) mvnorm.e(x) normal.e(x) } \arguments{ \item{x}{ data matrix of multivariate sample, or univariate data vector} \item{R}{ number of bootstrap replicates } } \details{ If \code{x} is a matrix, each row is a multivariate observation. The data will be standardized to zero mean and identity covariance matrix using the sample mean vector and sample covariance matrix. If \code{x} is a vector, the univariate statistic \code{normal.e(x)} is returned. If the data contains missing values or the sample covariance matrix is singular, NA is returned. The \eqn{\mathcal{E}}{E}-test of multivariate normality was proposed and implemented by Szekely and Rizzo (2005). The test statistic for d-variate normality is given by \deqn{\mathcal{E} = n (\frac{2}{n} \sum_{i=1}^n E\|y_i-Z\| - E\|Z-Z'\| - \frac{1}{n^2} \sum_{i=1}^n \sum_{j=1}^n \|y_i-y_j\|), }{E = n((2/n) sum[1:n] E||y_i-Z|| - E||Z-Z'|| - (1/n^2) sum[1:n,1:n] ||y_i-y_j||),} where \eqn{y_1,\ldots,y_n} is the standardized sample, \eqn{Z, Z'} are iid standard d-variate normal, and \eqn{\| \cdot \|}{|| ||} denotes Euclidean norm. The \eqn{\mathcal{E}}{E}-test of multivariate (univariate) normality is implemented by parametric bootstrap with \code{R} replicates. } \value{ The value of the \eqn{\mathcal{E}}{E}-statistic for univariate normality is returned by \code{normal.e}. The value of the \eqn{\mathcal{E}}{E}-statistic for multivariate normality is returned by \code{mvnorm.e}. \code{mvnorm.etest} returns a list with class \code{htest} containing \item{method}{description of test} \item{statistic}{observed value of the test statistic} \item{p.value}{approximate p-value of the test} \item{data.name}{description of data} } \references{ Szekely, G. J. and Rizzo, M. L. (2005) A New Test for Multivariate Normality, \emph{Journal of Multivariate Analysis}, 93/1, 58-80, \url{http://dx.doi.org/10.1016/j.jmva.2003.12.002}. Rizzo, M. L. (2002). A New Rotation Invariant Goodness-of-Fit Test, Ph.D. dissertation, Bowling Green State University. Szekely, G. J. (1989) Potential and Kinetic Energy in Statistics, Lecture Notes, Budapest Institute of Technology (Technical University). } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ ## compute normality test statistics for iris Setosa data data(iris) mvnorm.e(iris[1:50, 1:4]) normal.e(iris[1:50, 1]) ## test if the iris Setosa data has multivariate normal distribution mvnorm.etest(iris[1:50,1:4], R = 199) ## test a univariate sample for normality x <- runif(50, 0, 10) mvnorm.etest(x, R = 199) } \keyword{ multivariate } \keyword{ htest } \concept{ energy statistics } energy/man/energy-package.Rd0000644000176000001440000000156212423524166015530 0ustar ripleyusers\name{energy-package} \alias{energy-package} \alias{energy} \docType{package} \title{ E-statistics (energy statistics) } \description{ Description: E-statistics (energy) tests and statistics for comparing distributions: multivariate normality, multivariate distance components and k-sample test for equal distributions, hierarchical clustering by e-distances, multivariate independence tests, distance correlation, goodness-of-fit tests. Energy-statistics concept based on a generalization of Newton's potential energy is due to Gabor J. Szekely. } \author{ Maria L. Rizzo and Gabor J. Szekely } \references{ G. J. Szekely and M. L. Rizzo (2013). Energy statistics: A class of statistics based on distances, \emph{Journal of Statistical Planning and Inference}, \url{http://dx.doi.org/10.1016/j.jspi.2013.03.018} } \keyword{ package } \keyword{ multivariate } energy/man/energy.hclust.Rd0000644000176000001440000001134312423524166015436 0ustar ripleyusers\name{energy.hclust} \alias{energy.hclust} \title{ Hierarchical Clustering by Minimum (Energy) E-distance } \description{ Performs hierarchical clustering by minimum (energy) E-distance method. } \usage{ energy.hclust(dst, alpha = 1) } \arguments{ \item{dst}{Euclidean distances in a \code{dist} object, or a distance matrix produced by \code{dist}, or lower triangle of distance matrix as vector in column order. If \code{dst} is a square matrix, the lower triangle is interpreted as a vector of distances.} \item{alpha}{distance exponent} } \details{ Dissimilarities are \eqn{d(x,y) = \|x-y\|^\alpha}{||x-y||^a}, where the exponent \eqn{\alpha}{a} is in the interval (0,2]. This function performs agglomerative hierarchical clustering. Initially, each of the n singletons is a cluster. At each of n-1 steps, the procedure merges the pair of clusters with minimum e-distance. The e-distance between two clusters \eqn{C_i, C_j} of sizes \eqn{n_i, n_j} is given by \deqn{e(C_i, C_j)=\frac{n_i n_j}{n_i+n_j}[2M_{ij}-M_{ii}-M_{jj}], } where \deqn{M_{ij}=\frac{1}{n_i n_j}\sum_{p=1}^{n_i} \sum_{q=1}^{n_j} \|X_{ip}-X_{jq}\|^\alpha,}{ M_{ij} = 1/(n_i n_j) sum[1:n_i, 1:n_j] ||X_(ip) - X_(jq)||^a,} \eqn{\|\cdot\|}{|| ||} denotes Euclidean norm, and \eqn{X_{ip}}{ X_(ip)} denotes the p-th observation in the i-th cluster. The return value is an object of class \code{hclust}, so \code{hclust} methods such as print or plot methods, \code{plclust}, and \code{cutree} are available. See the documentation for \code{hclust}. The e-distance measures both the heterogeneity between clusters and the homogeneity within clusters. \eqn{\mathcal E}{E}-clustering (\eqn{\alpha=1}{a=1}) is particularly effective in high dimension, and is more effective than some standard hierarchical methods when clusters have equal means (see example below). For other advantages see the references. } \value{ An object of class \code{hclust} which describes the tree produced by the clustering process. The object is a list with components: \item{merge:}{ an n-1 by 2 matrix, where row i of \code{merge} describes the merging of clusters at step i of the clustering. If an element j in the row is negative, then observation -j was merged at this stage. If j is positive then the merge was with the cluster formed at the (earlier) stage j of the algorithm.} \item{height:}{the clustering height: a vector of n-1 non-decreasing real numbers (the e-distance between merging clusters)} \item{order:}{ a vector giving a permutation of the indices of original observations suitable for plotting, in the sense that a cluster plot using this ordering and matrix \code{merge} will not have crossings of the branches.} \item{labels:}{ labels for each of the objects being clustered.} \item{call:}{ the call which produced the result.} \item{method:}{ the cluster method that has been used (e-distance).} \item{dist.method:}{ the distance that has been used to create \code{dst}.} } \references{ Szekely, G. J. and Rizzo, M. L. (2005) Hierarchical Clustering via Joint Between-Within Distances: Extending Ward's Minimum Variance Method, \emph{Journal of Classification} 22(2) 151-183. \cr \url{http://dx.doi.org/10.1007/s00357-005-0012-9} Szekely, G. J. and Rizzo, M. L. (2004) Testing for Equal Distributions in High Dimension, \emph{InterStat}, November (5). Szekely, G. J. (2000) Technical Report 03-05: \eqn{\mathcal{E}}{E}-statistics: Energy of Statistical Samples, Department of Mathematics and Statistics, Bowling Green State University. } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \seealso{ \code{\link{edist}} \code{\link{ksample.e}} \code{\link{eqdist.etest}} \code{hclust}} \examples{ \dontrun{ library(cluster) data(animals) plot(energy.hclust(dist(animals))) } data(USArrests) ecl <- energy.hclust(dist(USArrests)) print(ecl) plot(ecl) cutree(ecl, k=3) cutree(ecl, h=150) ## compare performance of e-clustering, Ward's method, group average method ## when sampled populations have equal means: n=200, d=5, two groups z <- rbind(matrix(rnorm(1000), nrow=200), matrix(rnorm(1000, 0, 5), nrow=200)) g <- c(rep(1, 200), rep(2, 200)) d <- dist(z) e <- energy.hclust(d) a <- hclust(d, method="average") w <- hclust(d^2, method="ward") list("E" = table(cutree(e, k=2) == g), "Ward" = table(cutree(w, k=2) == g), "Avg" = table(cutree(a, k=2) == g)) } \keyword{ multivariate } \keyword{ cluster } \concept{ energy statistics } energy/man/disco.Rd0000644000176000001440000001055612423524166013752 0ustar ripleyusers\name{disco} \alias{disco} \alias{disco.between} \alias{print.disco} \title{ distance components (DISCO)} \description{ E-statistics DIStance COmponents and tests, analogous to variance components } \usage{ disco(x, factors, distance, index=1.0, R=0, method=c("disco","discoB","discoF")) disco.between(x, factors, distance, index=1.0, R=0) } \arguments{ \item{x}{ data matrix or distance matrix} \item{factors}{ matrix of factor labels or integers (not design matrix)} \item{distance}{ logical, TRUE if x is distance matrix} \item{index}{ exponent on Euclidean distance in (0,2]} \item{R}{ number of replicates for a permutation test} \item{method}{ test statistic } } \details{ \code{disco} calculates the distance components decomposition of total dispersion and if R > 0 tests for significance using the test statistic disco "F" ratio (default \code{method="disco"}), or using the between component statistic (\code{method="discoB"}), each implemented by permutation test. In the current release \code{disco} computes the decomposition for one-way models only. } \value{ When \code{method="discoF"}, \code{disco} returns a class \code{disco} object, which is a list containing \item{call}{call} \item{method}{method} \item{statistic}{vector of observed statistics} \item{p.value}{vector of p-values} \item{k}{number of factors} \item{N}{number of observations} \item{between}{between-sample distance components} \item{withins}{one-way within-sample distance components} \item{within}{within-sample distance component} \item{total}{total dispersion} \item{Df.trt}{degrees of freedom for treatments} \item{Df.e}{degrees of freedom for error} \item{index}{index (exponent on distance)} \item{factor.names}{factor names} \item{factor.levels}{factor levels} \item{sample.sizes}{sample sizes} \item{stats}{matrix containing decomposition} When \code{method="discoB"}, \code{disco} passes the arguments to \code{disco.between}, which returns a class \code{htest} object. \code{disco.between} returns a class \code{htest} object, where the test statistic is the between-sample statistic (proportional to the numerator of the F ratio of the \code{disco} test. } \references{ M. L. Rizzo and G. J. Szekely (2010). DISCO Analysis: A Nonparametric Extension of Analysis of Variance, Annals of Applied Statistics, Vol. 4, No. 2, 1034-1055. \cr \url{http://dx.doi.org/10.1214/09-AOAS245} } \note{ The current version does all calculations via matrix arithmetic and boot function. Support for more general additive models and a formula interface is under development. \code{disco} methods have been added to the cluster distance summary function \code{edist}, and energy tests for equality of distribution (see \code{eqdist.etest}). } \seealso{ \code{ \link{edist} } \code{ \link{eqdist.e} } \code{ \link{eqdist.etest} } \code{ \link{ksample.e} } } \author{ Maria L. Rizzo \email{mrizzo @ bgsu.edu} and Gabor J. Szekely } \examples{ ## warpbreaks one-way decompositions data(warpbreaks) attach(warpbreaks) disco(breaks, factors=wool, R=99) ## When index=2 for univariate data, we get ANOVA decomposition disco(breaks, factors=tension, index=2.0, R=99) aov(breaks ~ tension) ## Multivariate response ## Example on producing plastic film from Krzanowski (1998, p. 381) tear <- c(6.5, 6.2, 5.8, 6.5, 6.5, 6.9, 7.2, 6.9, 6.1, 6.3, 6.7, 6.6, 7.2, 7.1, 6.8, 7.1, 7.0, 7.2, 7.5, 7.6) gloss <- c(9.5, 9.9, 9.6, 9.6, 9.2, 9.1, 10.0, 9.9, 9.5, 9.4, 9.1, 9.3, 8.3, 8.4, 8.5, 9.2, 8.8, 9.7, 10.1, 9.2) opacity <- c(4.4, 6.4, 3.0, 4.1, 0.8, 5.7, 2.0, 3.9, 1.9, 5.7, 2.8, 4.1, 3.8, 1.6, 3.4, 8.4, 5.2, 6.9, 2.7, 1.9) Y <- cbind(tear, gloss, opacity) rate <- factor(gl(2,10), labels=c("Low", "High")) ## test for equal distributions by rate disco(Y, factors=rate, R=99) disco(Y, factors=rate, R=99, method="discoB") ## Just extract the decomposition table disco(Y, factors=rate)$stats ## Compare eqdist.e methods for rate ## disco between stat is half of original when sample sizes equal eqdist.e(Y, sizes=c(10, 10), method="original") eqdist.e(Y, sizes=c(10, 10), method="discoB") ## The between-sample distance component disco.between(Y, factors=rate) } \keyword{ htest } \keyword{ multivariate }