fastICA/0000755000176000001440000000000013510655147011563 5ustar ripleyusersfastICA/inst/0000755000176000001440000000000012131311405012520 5ustar ripleyusersfastICA/inst/README0000644000176000001440000000213611754561777013437 0ustar ripleyusersR Package - fastICA INTRODUCTION This file explains how to install the R package fastICA. I assume that you have a basic knowledge of UNIX/LINUX and that you already have R installed on your machine. INSTALLATION 1. Download the g-zipped tar archive fastICA_*.*-*.tar.gz 2. Install the package using R CMD INSTALL -l /path/to/LIB fastICA_*.*-*.tar.gz /path/to/LIB should be the library directory where you wish to put the package. If you have root access to your machine then use R CMD INSTALL fastICA_*.*-*.tar.gz and the package will be put into the default R library tree. 5. You can then start an R session and load the package using library(fastICA,lib.loc="/path/to/LIB") or library(fastICA) (if you installed the package into the default R library tree) FUNCTIONS The package contains the following functions fastICA - FastICA algorithm ica.R.def - R code for FastICA using a deflation scheme ica.R.par - R code for FastICA using a parallel scheme PROBLEMS/SUGGESTIONS If you have any problems/suggestions using this package, please email ripley@stats.ox.ac.uk fastICA/inst/HISTORY0000644000176000001440000000261311754561777013643 0ustar ripleyusers2001-08-29 - Version 1.0-1 -------------------------- -Cleaned up the help files a bit 2002-01-15 - Version 1.1-1 -------------------------- ** Changes suggested by B D Ripley ** -changed to Lapack SGESDD routine and calculate optimal workspace to increase speed -better error checking within the C code -more transparent C code -FastICA function replaced by fastICA and takes standard (nxp) data matrix as argument 2002-09-23 - Version 1.1-2 -------------------------- -all svd calls now done using La.svd() using method "dgesdd" -configure.win script added to package 2003-03-12 - Version 1.1-3 -------------------------- -fixed memory leak in function calc_K_JM in file ica.c 2003-04-04 - Version 1.1-4 -------------------------- -made code R 1.7.0 compatible 2004-06-08 - Version 1.1-5 -------------------------- -changed detection of blas & lapack, to be MacOS X compatible (no lsame in BLAS). 2004-12-23 - Version 1.1-6 -------------------------- - implemented Brian's changes 2006-05-12 - Version 1.1-7 -------------------------- - implemented change to fastICA function scaling 2006-05-16 - Version 1.1-8 -------------------------- - Brian made changes to separate out slamc.f and ensure it is compiled without optimization 2007-10-10 - Version 1.1-9 -------------------------- - Changes need to avoid looping on 2.6.0 under Windows. - Add NAMESPACE - BDR takes over as maintainer. fastICA/tests/0000755000176000001440000000000012131311405012705 5ustar ripleyusersfastICA/tests/one-component.R0000644000176000001440000000076511754561777015656 0ustar ripleyusers## failed in 1.1-9 library(fastICA) set.seed(49394) S <- matrix(runif(10000), 5000, 2) A <- matrix(c(1, 1, -1, 3), 2, 2, byrow = TRUE) X <- S%*%A a <- fastICA(X, 1, alg.typ = "parallel", fun = "logcosh", alpha = 1, method = "C", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) # good a <- fastICA(X, 1, alg.typ = "parallel", fun = "logcosh", alpha = 1, method = "R", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) # fails fastICA/src/0000755000176000001440000000000012655570600012351 5ustar ripleyusersfastICA/src/Makevars0000644000176000001440000000006012211376653014041 0ustar ripleyusersPKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) fastICA/src/fastICA-win.def0000644000176000001440000000005411754561777015113 0ustar ripleyusersLIBRARY fastICA.dll EXPORTS R_init_fastICA fastICA/src/ica.c0000644000176000001440000004647012655570600013264 0ustar ripleyusers#include #include #include #include #include #include static int min_JM (int *, int *); static int max_JM (int *, int *); static void rowcentre_JM (double *, int, int); static void colstandard_JM (double *, int, int); static void rowstd_JM (double *, int, int, int); static void transpose_mat_JM (double *, int *, int *, double *); static void mmult_JM (double *, int, int, double *, int, int, double *); static void orthog_mat_JM (double *, int, double *); static void gramsch_JM (double *, int, int, int); static void svd_JM (double *, int *, int *, double *, double *, double *); static void Symm_logcosh_JM (double *, int, double *, int, int, double, double *, double *); static void Symm_exp_JM (double *, int, double *, int, int, double, double *, double *); static void Def_logcosh_JM (double *, int, double *, int, int, double, double *); static void Def_exp_JM (double *, int, double *, int, int, double, double *); static void calc_A_JM(double*, double*, double*, int*, int*, int*, double*, double*); static void calc_K_JM(double*, int*, int*, double*); #include static void rowcentre_JM (double *ans, int n, int p) { /* mean centres nxp matrix ans */ double tmp; int i, j; for (i = 0; i < n; i++) { tmp = 0; for (j = 0; j < p; j++) { tmp = tmp + ((double) ans[p * i + j]) / p; } for (j = 0; j < p; j++) { ans[p * i + j] -= (double) tmp; } } } static void colstandard_JM (double *ans, int n, int p) { /* transform columns of nxp matrix ans to have zero mean and unit variance */ double tmp[2]; double tmp1; int i, j; for (i = 0; i < p; i++) { tmp[0] = 0; tmp[1] = 0; for (j = 0; j < n; j++) { tmp[0] += (double) ans[p * j + i]; tmp[1] += ((double) ans[p * j + i]) * ((double) ans[p * j + i]); } tmp[0] = tmp[0] / n; tmp1 = (tmp[1] - n * (tmp[0]) * (tmp[0])) / (n - 1); tmp[1] = sqrt (tmp1); for (j = 0; j < n; j++) { ans[p * j + i] = (double) ((((double) ans[p * j + i]) - tmp[0]) / tmp[1]); } } } static void svd_JM (double *mat, int *n, int *p, double *u, double *d, double *v) { /* calculates svd decomposition of nxp matrix mat */ /* mat is a pointer to an nxp array of doubles */ /* n is a pointer to an integer specifying the no. of rows of mat */ /* p is a pointer to an integer specifying the no. of cols of mat */ /* u is a pointer to a double array of dimension (n,n) */ /* d is a pointer to a double array of dimension min(n,p) */ /* v is a pointer to a double array of dimension (p,p) */ int info, *iwork, lwork, a, b; size_t iwork_size, ilwork, nn = *n, pp = *p, mm; double *work, *mat1, *u1, *v1; char jobz = 'A'; mm = min_JM(n,p); iwork_size = 8 * (size_t) mm; a = max_JM(n,p); b = 4 * mm * mm + 4 * mm; ilwork = 3 * mm * mm + max_JM(&a, &b); if (ilwork > INT_MAX) error("svd on %d x %d exceeds Fortran indexing limits"); work = Calloc (ilwork, double); iwork = Calloc (iwork_size, int); mat1 = Calloc (nn * pp, double); u1 = Calloc (nn * nn, double); v1 = Calloc (pp * pp, double); transpose_mat_JM (mat, n, p, mat1); lwork = ilwork; F77_CALL (dgesdd) (&jobz, n, p, mat1, n, d, u1, n, v1, p, work, &lwork, iwork, &info); transpose_mat_JM (u1, n, n, u); transpose_mat_JM (v1, p, p, v); Free (mat1); Free (u1); Free (v1); Free (work); Free (iwork); } static void transpose_mat_JM (double *mat, int *n, int *p, double *ans) { /* transpose nxp matrix mat */ int i, j; for (i = 0; i < *n; i++) { for (j = 0; j < *p; j++) { *(ans + j * (*n) + i) = *(mat + i * (*p) + j); } } } static int min_JM (int *a, int *b) { /* find minimum of a and b */ int ans; ans = *b; if (*a < *b) ans = *a; return ans; } static int max_JM (int *a, int *b) { /* find maximum of a and b */ int ans; ans = *b; if (*a > *b) ans = *a; return ans; } static void mmult_JM (double *A, int n, int p, double *B, int q, int r, double *C) { /* matrix multiplication using FORTRAN BLAS routine SGEMM */ /* A is (n*p) and B is (q*r), A*B returned to C */ double alpha = 1.0, beta = 0.0; int M, K, N; char transA = 'N', transB = 'N'; if (p != q) { error ("Error, matrices not suitable\nfor multiplication"); } else { M = n; K = p; N = r; F77_CALL (dgemm) (&transA, &transB, &N, &M, &K, &alpha, B, &N, A, &K, &beta, C, &N); } } static void orthog_mat_JM (double *mat, int e, double *orthog) { /* take Wmat, (e*e), and return orthogonalized version to orthog_W */ double *u, *v, *d, *temp; int i; size_t ee = e; u = Calloc (ee * ee, double); d = Calloc (ee, double); v = Calloc (ee * ee, double); temp = Calloc (ee * ee, double); svd_JM (mat, &e, &e, u, d, v); for (i = 0; i < e; i++) { temp[i * e + i] = 1 / (d[i]); } mmult_JM (u, e, e, temp, e, e, v); transpose_mat_JM (u, &e, &e, temp); mmult_JM (v, e, e, temp, e, e, u); mmult_JM (u, e, e, mat, e, e, orthog); Free (u); Free (v); Free (d); Free (temp); } static void Symm_logcosh_JM (double *w_init, int e, double *data, int f, int p, double alpha, double *w_final, double *Tol) { /* Function that carries out Symmetric ICA using a logcosh approximation to the neg. entropy function */ double *mat1, *mat2, *mat3, *mat4, *mat5, *mat6; int i, j; double mean; if (e != f) { error ("error in Symm_logcosh_JM, dims dont match"); } else { size_t es = (size_t)e * (size_t)e; size_t ep = (size_t)e * (size_t)p; mat1 = Calloc (ep, double); mat2 = Calloc (ep, double); mat3 = Calloc (es, double); mat4 = Calloc (es, double); mat5 = Calloc (es, double); mat6 = Calloc (es, double); mmult_JM (w_init, e, e, data, e, p, mat1); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat1[i * p + j] = tanh (alpha * mat1[i * p + j]); } } transpose_mat_JM (data, &e, &p, mat2); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat2[i * p + j] = (mat2[i * p + j]) / p; } } mmult_JM (mat1, e, p, mat2, p, e, mat3); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat1[i * p + j] = (alpha * (1 - (mat1[i * p + j]) * (mat1[i * p + j]))); } } for (i = 0; i < e; i++) { mean = 0; for (j = 0; j < p; j++) { mean += ((mat1[i * p + j]) / p); } mat4[i * e + i] = mean; } mmult_JM (mat4, e, e, w_init, e, e, mat5); for (i = 0; i < e; i++) { for (j = 0; j < e; j++) { mat4[i * e + j] = (mat3[i * e + j] - mat5[i * e + j]); } } transpose_mat_JM (w_init, &e, &e, mat6); orthog_mat_JM (mat4, e, w_final); mmult_JM (w_final, e, e, mat6, e, e, mat5); mean = 0; for (i = 0; i < e; i++) { if (fabs (1 - fabs (mat5[i * e + i])) > mean) { mean = (fabs (1 - fabs (mat5[i * e + i]))); } } *Tol = mean; Free (mat1); Free (mat2); Free (mat3); Free (mat4); Free (mat5); Free (mat6); } } static void Def_logcosh_JM (double *w_init, int e, double *data, int f, int p, double alpha, double *w_final) { /* Function that carries out Deflation ICA using an logcosh approximation to the neg. entropy function */ double *mat1, *mat2, *mat3, *mat4; int i, j; double mean; if (e != f) { error ("error in Def_logcosh_JM, dims dont match"); } else { mat1 = Calloc (p, double); mat2 = Calloc ((size_t)e * (size_t)p, double); mat3 = Calloc (e, double); mat4 = Calloc (e, double); mmult_JM (w_init, 1, e, data, e, p, mat1); for (i = 0; i < p; i++) { mat1[i] = tanh (alpha * mat1[i]); } transpose_mat_JM (data, &e, &p, mat2); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat2[i * p + j] = (mat2[i * p + j]) / p; } } mmult_JM (mat1, 1, p, mat2, p, e, mat3); for (i = 0; i < p; i++) { mat1[i] = (alpha * (1 - (mat1[i]) * (mat1[i]))); } mean = 0; for (j = 0; j < p; j++) { mean += ((mat1[j]) / p); } for (i = 0; i < e; i++) { mat4[i] = (w_init[i]) * mean; } for (i = 0; i < e; i++) { w_final[i] = (mat3[i] - mat4[i]); } Free (mat1); Free (mat2); Free (mat3); Free (mat4); } } static void Symm_exp_JM (double *w_init, int e, double *data, int f, int p, double alpha, double *w_final, double *Tol) { /* Function that carries out Symmetric ICA using a exponential approximation to the neg. entropy function */ double *mat1, *mat2, *mat3, *mat4, *mat5, *mat0, *mat6; int i, j; double mean; if (e != f) { error ("error in Symm_exp_JM, dims dont match"); } else { size_t ep = (size_t)e * (size_t)p; size_t ee = (size_t)e * (size_t)e; mat0 = Calloc (ep, double); mat1 = Calloc (ep, double); mat2 = Calloc (ep, double); mat3 = Calloc (ee, double); mat4 = Calloc (ee, double); mat5 = Calloc (ee, double); mat6 = Calloc (ee, double); mmult_JM (w_init, e, e, data, e, p, mat1); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat0[i * p + j] = (mat1[i * p + j]) * exp (-0.5 * (mat1[i * p + j]) * (mat1[i * p + j])); } } transpose_mat_JM (data, &e, &p, mat2); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat2[i * p + j] = (mat2[i * p + j]) / p; } } mmult_JM (mat0, e, p, mat2, p, e, mat3); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat1[i * p + j] = ((1 - (mat1[i * p + j]) * (mat1[i * p + j])) * exp (-0.5 * (mat1 [i * p + j]) * (mat1 [i * p + j]))); } } for (i = 0; i < e; i++) { mean = 0; for (j = 0; j < p; j++) { mean += ((mat1[i * p + j]) / p); } mat4[i * e + i] = mean; } mmult_JM (mat4, e, e, w_init, e, e, mat5); for (i = 0; i < e; i++) { for (j = 0; j < e; j++) { mat4[i * e + j] = (mat3[i * e + j] - mat5[i * e + j]); } } transpose_mat_JM (w_init, &e, &e, mat6); orthog_mat_JM (mat4, e, w_final); mmult_JM (w_final, e, e, mat6, e, e, mat5); mean = 0; for (i = 0; i < e; i++) { if (fabs (1 - fabs (mat5[i * e + i])) > mean) { mean = (fabs (1 - fabs (mat5[i * e + i]))); } } *Tol = mean; Free (mat1); Free (mat2); Free (mat3); Free (mat4); Free (mat5); Free (mat0); Free (mat6); } } static void Def_exp_JM (double *w_init, int e, double *data, int f, int p, double alpha, double *w_final) { /* Function that carries out Deflation ICA using an exponential approximation to the neg. entropy function */ double *mat1, *mat2, *mat3, *mat4; int i, j; double mean; if (e != f) { error ("error in Def_exp_JM, dims dont match"); } else { mat1 = Calloc (p, double); mat2 = Calloc ((size_t)e * (size_t)p, double); mat3 = Calloc (e, double); mat4 = Calloc (e, double); mmult_JM (w_init, 1, e, data, e, p, mat1); for (i = 0; i < p; i++) { mat1[i] = ((mat1[i]) * exp (-0.5 * (mat1[i]) * (mat1[i]))); } transpose_mat_JM (data, &e, &p, mat2); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat2[i * p + j] = (mat2[i * p + j]) / p; } } mmult_JM (mat1, 1, p, mat2, p, e, mat3); mmult_JM (w_init, 1, e, data, e, p, mat1); for (i = 0; i < p; i++) { mat1[i] = ((1 - (mat1[i]) * (mat1[i])) * exp (-.5 * (mat1[i]) * (mat1[i]))); } mean = 0; for (j = 0; j < p; j++) { mean += ((mat1[j]) / p); } for (i = 0; i < e; i++) { mat4[i] = (w_init[i]) * mean; } for (i = 0; i < e; i++) { w_final[i] = (mat3[i] - mat4[i]); } Free (mat1); Free (mat2); Free (mat3); Free (mat4); } } static void gramsch_JM (double *ww, int n, int m, int k) { int ip, jp; double tmp; /* do Gram-Schmidt on row k of (n*m) matrix ww */ k -= 1; if (k > n) { error ("Error in gramsch"); } else { for (ip = 0; ip < k; ip++) { tmp = 0; for (jp = 0; jp < m; jp++) { tmp += ((ww[m * ip + jp]) * (ww[m * k + jp])); } for (jp = 0; jp < m; jp++) { ww[m * k + jp] = (ww[m * k + jp] - ((ww[m * ip + jp]) * tmp)); } } } } static void rowstd_JM (double *ww, int n, int m, int k) { /* for ww (n*m), make ||ww[k, ]|| equal 1 */ double tmp = 0; int i; k -= 1; if (k > n) { error ("Error in rowstd"); } else { for (i = 0; i < m; i++) { tmp += ((ww[k * m + i]) * (ww[k * m + i])); } tmp = sqrt (tmp); for (i = 0; i < m; i++) { ww[k * m + i] = ((ww[k * m + i]) / tmp); } } } static void calc_K_JM(double *x, int *n, int *p, double *K) { int i, j; double *xxt, *xt, *u, *d, *v, *temp1, *temp2; size_t nn = *n, pp = *p; xxt = Calloc (nn * nn, double); xt = Calloc (nn * pp, double); /* transpose x matrix */ transpose_mat_JM (x, n, p, xt); /* calculate sample covariance matrix xxt */ mmult_JM (x, *n, *p, xt, *p, *n, xxt); for (i = 0; i < *n; i++) { for (j = 0; j < *n; j++) { xxt[*n * i + j] = xxt[*n * i + j] / *p; } } Free (xt); /* calculate svd decomposition of xxt */ u = Calloc (nn * nn, double); d = Calloc (nn, double); v = Calloc (nn * nn, double); svd_JM (xxt, n, n, u, d, v); /* calculate K matrix*/ temp1 = Calloc (nn * nn, double); temp2 = Calloc (nn * nn, double); for (i = 0; i < *n; i++) { temp1[*n * i + i] = 1 / sqrt (d[i]); } transpose_mat_JM (u, n, n, temp2); mmult_JM (temp1, *n, *n, temp2, *n, *n, K); Free (temp1); Free (temp2); Free(xxt); Free(u); Free(d); Free(v); } static void calc_A_JM(double *w, double *k, double *data, int *e, int *n, int *p, double *A, double *unmixed_data) { /* calculate un-mixing matrix A */ int i; double *um, *umt, *umumt, *uu, *dd, *vv, *temp1, *temp2, *temp3; size_t nn = *n, ee = *e; um = Calloc (ee * nn, double); umt = Calloc (nn * ee, double); mmult_JM (w, *e, *e, k, *e, *n, um); mmult_JM (um, *e, *n, data, *n, *p, unmixed_data); transpose_mat_JM (um, e, n, umt); umumt = Calloc (ee * ee, double); mmult_JM (um, *e, *n, umt, *n, *e, umumt); uu = Calloc (ee * ee, double); dd = Calloc (ee, double); vv = Calloc (ee * ee, double); svd_JM (umumt, e, e, uu, dd, vv); temp1 = Calloc (ee * ee, double); for (i = 0; i < *e; i++) { temp1[*e * i + i] = 1 / (dd[i]); } temp2 = Calloc (ee * ee, double); temp3 = Calloc (ee * ee, double); transpose_mat_JM (vv, e, e, temp3); mmult_JM (temp3, *e, *e, temp1, *e, *e, temp2); transpose_mat_JM (uu, e, e, vv); mmult_JM (temp2, *e, *e, vv, *e, *e, uu); mmult_JM (umt, *n, *e, uu, *e, *e, A); Free(um); Free(umt); Free(umumt); Free(uu); Free(dd); Free(vv); Free(temp1); Free(temp2); Free(temp3); } static void icainc_JM (double *data_matrix, double *w_matrix, int *nn, int *pp, int *ee, double *alpha, int *rowflag, int *colflag, int *funflag, int *maxit, double *lim, int *defflag, int *verbose, double *data_pre, double *Kmat1, double *w_final, double *ansa, double *ansx2) { /* main ICA function */ int i, j, k; size_t n = *nn, p = *pp, e = *ee; double tol; double *temp_w1, *temp_w2; double *data1, *Kmat, *temp1, *w_init; /* make a copy of the data matrix*/ data1 = Calloc (n * p, double); for (i = 0; i < n; i++) { for (j = 0; j < p; j++) { data_pre[i * p + j] = data_matrix[i * p + j]; } } /* row center data matrix if required*/ if (*rowflag == 1) { rowcentre_JM (data_pre, n, p); if (*verbose == 1) Rprintf ("Centering\n"); } /* standardize columns of data matrix if required*/ if (*colflag == 1) { colstandard_JM (data_pre, n, p); Rprintf("colstandard\n"); } /* calculate pre-whitening matrix Kmat */ if (*verbose == 1) Rprintf ("Whitening\n"); Kmat = Calloc (n * n, double); calc_K_JM(data_pre, nn, pp, Kmat); /* pre-whiten data and reduce dimension from size n to size e */ for (i = 0; i < e; i++) { for (j = 0; j < n; j++) { Kmat1[i * n + j] = Kmat[i * n + j]; } } mmult_JM (Kmat1, e, n, data_pre, n, p, data1); /* calculate initial (orthogonal) unmixing matrix w */ temp1 = Calloc (e * e, double); w_init = Calloc (e * e, double); for (i = 0; i < e; i++) { for (j = 0; j < e; j++) { temp1[i * e + j] = w_matrix[i * e + j]; } } orthog_mat_JM (temp1, e, w_init); /* Main ICA code */ if (*defflag == 0) { if (*funflag == 1) { if (*verbose == 1) Rprintf("Symmetric FastICA using logcosh approx. to neg-entropy function\n"); i = 1; Symm_logcosh_JM (w_init, e, data1, e, p, *alpha, w_final, &tol); if (*verbose == 1) Rprintf ("Iteration %d tol=%f\n", i, tol); i = 2; while ((tol > (*lim)) && (i < (*maxit))) { Symm_logcosh_JM (w_final, e, data1, e, p, *alpha, w_final, &tol); if (*verbose == 1) Rprintf ("Iteration %d tol=%f\n", i, tol); i += 1; } } if (*funflag == 2) { if (*verbose == 1) Rprintf("Symmetric FastICA using exponential approx. to neg-entropy function\n"); i = 1; Symm_exp_JM (w_init, e, data1, e, p, *alpha, w_final, &tol); if (*verbose == 1) Rprintf ("Iteration %d tol=%f\n", i, tol); i = 2; while ((tol > (*lim)) && (i < (*maxit))) { Symm_exp_JM (w_final, e, data1, e, p, *alpha, w_final, &tol); if (*verbose == 1) Rprintf ("Iteration %d tol=%f\n", i, tol); i += 1; } } } if (*defflag == 1) { temp_w1 = Calloc (e, double); temp_w2 = Calloc (e, double); if (*funflag == 1) { if (*verbose == 1) Rprintf ("Deflation FastICA using logcosh approx. to neg-entropy function\n"); for (i = 0; i < e; i++) { k = 0; gramsch_JM (w_init, e, e, i + 1); rowstd_JM (w_init, e, e, i + 1); tol = 1; while ((tol > (*lim)) && (k < (*maxit))) { for (j = 0; j < e; j++) { temp_w1[j] = w_init[i * e + j]; } Def_logcosh_JM (temp_w1, e, data1, e, p, *alpha, temp_w2); for (j = 0; j < e; j++) { w_init[i * e + j] = temp_w2[j]; } gramsch_JM (w_init, e, e, i + 1); rowstd_JM (w_init, e, e, i + 1); tol = 0; for (j = 0; j < e; j++) { tol += ((temp_w1[j]) * (w_init[i * e + j])); } tol = (fabs (fabs (tol) - 1)); k += 1; } if (*verbose == 1) Rprintf ("Component %d needed %d iterations tol=%f\n", i + 1, k, tol); } } if (*funflag == 2) { if (*verbose == 1) Rprintf ("Deflation FastICA using exponential approx. to neg-entropy function\n"); for (i = 0; i < e; i++) { k = 0; gramsch_JM (w_init, e, e, i + 1); rowstd_JM (w_init, e, e, i + 1); tol = 1; while ((tol > (*lim)) && (k < (*maxit))) { for (j = 0; j < e; j++) { temp_w1[j] = w_init[i * e + j]; } Def_exp_JM (temp_w1, e, data1, e, p, *alpha, temp_w2); for (j = 0; j < e; j++) { w_init[i * e + j] = temp_w2[j]; } gramsch_JM (w_init, e, e, i + 1); rowstd_JM (w_init, e, e, i + 1); tol = 0; for (j = 0; j < e; j++) { tol += ((temp_w1[j]) * (w_init[i * e + j])); } tol = (fabs (fabs (tol) - 1)); k += 1; } if (*verbose == 1) Rprintf ("Component %d needed %d iterations tol=%f\n", i + 1, k, tol); } } for (i = 0; i < e; i++) { for (j = 0; j < e; j++) { w_final[i * e + j] = w_init[i * e + j]; } } Free (temp_w1); Free (temp_w2); } /* calculate mixing matrix ansa */ calc_A_JM(w_final, Kmat1, data_pre, ee, nn, pp, ansa, ansx2); Free (data1); Free (Kmat); Free (temp1); Free (w_init); } #include static const R_CMethodDef CEntries[] = { {"icainc_JM", (DL_FUNC) &icainc_JM, 18}, {NULL, NULL, 0} }; void R_init_fastICA(DllInfo *dll) { R_registerRoutines(dll, CEntries, NULL, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } fastICA/NAMESPACE0000644000176000001440000000015613117443056013001 0ustar ripleyusersuseDynLib(fastICA, .registration = TRUE) importFrom("stats", "rnorm") export(fastICA, ica.R.def, ica.R.par) fastICA/R/0000755000176000001440000000000012421140531011746 5ustar ripleyusersfastICA/R/fastICA.R0000644000176000001440000001664212421140531013354 0ustar ripleyusersfastICA <- function (X, n.comp, alg.typ = c("parallel","deflation"), fun = c("logcosh", "exp"), alpha = 1, method = c("R", "C"), row.norm = FALSE, maxit = 200, tol = 1e-04, verbose = FALSE, w.init=NULL) { dd <- dim(X) d <- dd[dd != 1L] if (length(d) != 2L) stop("data must be matrix-conformal") X <- if (length(d) != length(dd)) matrix(X, d[1L], d[2L]) else as.matrix(X) if (alpha < 1 || alpha > 2) stop("alpha must be in range [1,2]") method <- match.arg(method) alg.typ <- match.arg(alg.typ) fun <- match.arg(fun) n <- nrow(X) p <- ncol(X) if (n.comp > min(n, p)) { message("'n.comp' is too large: reset to ", min(n, p)) n.comp <- min(n, p) } if(is.null(w.init)) w.init <- matrix(rnorm(n.comp^2),n.comp,n.comp) else { if(!is.matrix(w.init) || length(w.init) != (n.comp^2)) stop("w.init is not a matrix or is the wrong size") } if (method == "R") { if (verbose) message("Centering") X <- scale(X, scale = FALSE) X <- if (row.norm) t(scale(X, scale=row.norm)) else t(X) if (verbose) message("Whitening") V <- X %*% t(X)/n s <- La.svd(V) D <- diag(c(1/sqrt(s$d))) K <- D %*% t(s$u) K <- matrix(K[1:n.comp, ], n.comp, p) X1 <- K %*% X a <- if (alg.typ == "deflation") ica.R.def(X1, n.comp, tol = tol, fun = fun, alpha = alpha, maxit = maxit, verbose = verbose, w.init = w.init) else if (alg.typ == "parallel") ica.R.par(X1, n.comp, tol = tol, fun = fun, alpha = alpha, maxit = maxit, verbose = verbose, w.init = w.init) w <- a %*% K S <- w %*% X A <- t(w) %*% solve(w %*% t(w)) return(list(X = t(X), K = t(K), W = t(a), A = t(A), S = t(S))) } else if (method == "C") { a <- .C(icainc_JM, as.double(X), as.double(w.init), as.integer(p), as.integer(n), as.integer(n.comp), as.double(alpha), as.integer(1), as.integer(row.norm), as.integer(1L + (fun == "exp")), as.integer(maxit), as.double(tol), as.integer(alg.typ != "parallel"), as.integer(verbose), X = double(p * n), K = double(n.comp * p), W = double(n.comp * n.comp), A = double(p * n.comp), S = double(n.comp * n)) X1 <- matrix(a$X, n, p) K <- matrix(a$K, p, n.comp) W <- matrix(a$W, n.comp, n.comp) A <- matrix(a$A, n.comp, p) S <- matrix(a$S, n, n.comp) list(X = X1, K = K, W = W, A = A, S = S) } } ica.R.def <- function (X, n.comp, tol, fun, alpha, maxit, verbose, w.init) { if (verbose && fun == "logcosh") message("Deflation FastICA using logcosh approx. to neg-entropy function") if (verbose && fun =="exp") message("Deflation FastICA using exponential approx. to neg-entropy function") p <- ncol(X) W <- matrix(0, n.comp, n.comp) for (i in 1:n.comp) { if (verbose) message("Component ", i) w <- matrix(w.init[i,], n.comp, 1) if (i > 1) { t <- w t[1:length(t)] <- 0 for (u in 1:(i - 1)) { k <- sum(w * W[u, ]) t <- t + k * W[u, ] } w <- w - t } w <- w/sqrt(sum(w^2)) lim <- rep(1000, maxit) it <- 1 if (fun == "logcosh") { while (lim[it] > tol && it < maxit) { wx <- t(w) %*% X gwx <- tanh(alpha * wx) gwx <- matrix(gwx, n.comp, p, byrow = TRUE) xgwx <- X * gwx v1 <- apply(xgwx, 1, FUN = mean) g.wx <- alpha * (1 - (tanh(alpha * wx))^2) v2 <- mean(g.wx) * w w1 <- v1 - v2 w1 <- matrix(w1, n.comp, 1) it <- it + 1 if (i > 1) { t <- w1 t[1:length(t)] <- 0 for (u in 1:(i - 1)) { k <- sum(w1 * W[u, ]) t <- t + k * W[u, ] } w1 <- w1 - t } w1 <- w1/sqrt(sum(w1^2)) lim[it] <- Mod(Mod(sum((w1 * w))) - 1) if (verbose) message("Iteration ", it - 1, " tol = ", format(lim[it])) w <- matrix(w1, n.comp, 1) } } if (fun == "exp") { while (lim[it] > tol && it < maxit) { wx <- t(w) %*% X gwx <- wx * exp(-(wx^2)/2) gwx <- matrix(gwx, n.comp, p, byrow = TRUE) xgwx <- X * gwx v1 <- apply(xgwx, 1, FUN = mean) g.wx <- (1 - wx^2) * exp(-(wx^2)/2) v2 <- mean(g.wx) * w w1 <- v1 - v2 w1 <- matrix(w1, n.comp, 1) it <- it + 1 if (i > 1) { t <- w1 t[1:length(t)] <- 0 for (u in 1:(i - 1)) { k <- sum(w1 * W[u, ]) t <- t + k * W[u, ] } w1 <- w1 - t } w1 <- w1/sqrt(sum(w1^2)) lim[it] <- Mod(Mod(sum((w1 * w))) - 1) if (verbose) message("Iteration ", it - 1, " tol = ", format(lim[it])) w <- matrix(w1, n.comp, 1) } } W[i, ] <- w } W } ica.R.par <- function (X, n.comp, tol, fun, alpha, maxit, verbose, w.init) { Diag <- function(d) if(length(d) > 1L) diag(d) else as.matrix(d) p <- ncol(X) W <- w.init sW <- La.svd(W) W <- sW$u %*% Diag(1/sW$d) %*% t(sW$u) %*% W W1 <- W lim <- rep(1000, maxit) it <- 1 if (fun == "logcosh") { if (verbose) message("Symmetric FastICA using logcosh approx. to neg-entropy function") while (lim[it] > tol && it < maxit) { wx <- W %*% X gwx <- tanh(alpha * wx) v1 <- gwx %*% t(X)/p g.wx <- alpha * (1 - (gwx)^2) v2 <- Diag(apply(g.wx, 1, FUN = mean)) %*% W W1 <- v1 - v2 sW1 <- La.svd(W1) W1 <- sW1$u %*% Diag(1/sW1$d) %*% t(sW1$u) %*% W1 lim[it + 1] <- max(Mod(Mod(diag(W1 %*% t(W))) - 1)) W <- W1 if (verbose) message("Iteration ", it, " tol = ", format(lim[it + 1])) it <- it + 1 } } if (fun == "exp") { if (verbose) message("Symmetric FastICA using exponential approx. to neg-entropy function") while (lim[it] > tol && it < maxit) { wx <- W %*% X gwx <- wx * exp(-(wx^2)/2) v1 <- gwx %*% t(X)/p g.wx <- (1 - wx^2) * exp(-(wx^2)/2) v2 <- Diag(apply(g.wx, 1, FUN = mean)) %*% W W1 <- v1 - v2 sW1 <- La.svd(W1) W1 <- sW1$u %*% Diag(1/sW1$d) %*% t(sW1$u) %*% W1 lim[it + 1] <- max(Mod(Mod(diag(W1 %*% t(W))) - 1)) W <- W1 if (verbose) message("Iteration ", it, " tol = ", format(lim[it + 1])) it <- it + 1 } } W } fastICA/MD50000644000176000001440000000110513510655147012070 0ustar ripleyusers6c752e7f3a853bad90be3d19e2024ce8 *DESCRIPTION 42f8cf9076b983c5611fc628f614a114 *NAMESPACE 066745157170b756421854be7fbabb10 *R/fastICA.R 40621dbca246a248256ae059e627a613 *inst/HISTORY 3337902acc88f55c46cc1dc68be752ef *inst/README be438fa6cf3cbc59a652739d24e97ff4 *man/fastICA.Rd 6af3bdac63e29da7ca67a0b8a520a74f *man/ica.R.def.Rd e4cfec0c6d0723b8f7f70436baeae932 *man/ica.R.par.Rd 2a6f9e9e044a78154d3cfda5936d6f48 *src/Makevars ec2c3597b1c080f123eafb0f3130f153 *src/fastICA-win.def 6bba5c044b6e2bd8425223f5f2e39b8b *src/ica.c 03b612d8ff2a470ae415f68ea2e55afa *tests/one-component.R fastICA/DESCRIPTION0000644000176000001440000000104013510655147013264 0ustar ripleyusersPackage: fastICA Version: 1.2-2 Date: 2019-07-08 Title: FastICA Algorithms to Perform ICA and Projection Pursuit Author: J L Marchini, C Heaton and B D Ripley Maintainer: Brian Ripley Depends: R (>= 3.0.0) Suggests: MASS Description: Implementation of FastICA algorithm to perform Independent Component Analysis (ICA) and Projection Pursuit. License: GPL-2 | GPL-3 NeedsCompilation: yes Packaged: 2019-07-08 14:58:13 UTC; ripley Repository: CRAN Date/Publication: 2019-07-08 14:59:51 UTC fastICA/man/0000755000176000001440000000000012211376653012335 5ustar ripleyusersfastICA/man/ica.R.par.Rd0000644000176000001440000000223211754561777014356 0ustar ripleyusers\name{ica.R.par} \alias{ica.R.par} \title{R code for FastICA using a parallel scheme} \description{R code for FastICA using a parallel scheme in which the components are estimated simultaneously. This function is called by the fastICA function. } \usage{ ica.R.par(X, n.comp, tol, fun, alpha, maxit, verbose, w.init) } \arguments{ \item{X}{data matrix.} \item{n.comp}{number of components to be extracted.} \item{tol}{a positive scalar giving the tolerance at which the un-mixing matrix is considered to have converged.} \item{fun}{the functional form of the \eqn{G} function used in the approximation to negentropy.} \item{alpha}{constant in range [1,2] used in approximation to negentropy when \code{fun == "logcosh"}.} \item{maxit}{maximum number of iterations to perform.} \item{verbose}{a logical value indicating the level of output as the algorithm runs.} \item{w.init}{Initial value of un-mixing matrix.} } \value{The estimated un-mixing matrix W.} \details{ See the help on \code{\link{fastICA}} for details. } \author{ J L Marchini and C Heaton } \seealso{\code{\link{fastICA}}, \code{\link{ica.R.def}}} \keyword{utilities} fastICA/man/ica.R.def.Rd0000644000176000001440000000222311754561777014332 0ustar ripleyusers\name{ica.R.def} \alias{ica.R.def} \title{R code for FastICA using a deflation scheme} \description{R code for FastICA using a deflation scheme in which the components are estimated one by one. This function is called by the fastICA function. } \usage{ ica.R.def(X, n.comp, tol, fun, alpha, maxit, verbose, w.init) } \arguments{ \item{X}{data matrix} \item{n.comp}{number of components to be extracted} \item{tol}{a positive scalar giving the tolerance at which the un-mixing matrix is considered to have converged.} \item{fun}{the functional form of the \eqn{G} function used in the approximation to negentropy.} \item{alpha}{constant in range [1,2] used in approximation to negentropy when \code{fun == "logcosh"}} \item{maxit}{maximum number of iterations to perform} \item{verbose}{a logical value indicating the level of output as the algorithm runs.} \item{w.init}{Initial value of un-mixing matrix.} } \value{The estimated un-mixing matrix W.} \details{ See the help on \code{\link{fastICA}} for details. } \author{ J L Marchini and C Heaton } \seealso{\code{\link{fastICA}}, \code{\link{ica.R.par}}} \keyword{utilities} fastICA/man/fastICA.Rd0000644000176000001440000001624012211376653014101 0ustar ripleyusers\name{fastICA} \alias{fastICA} \title{FastICA algorithm} \description{ This is an R and C code implementation of the FastICA algorithm of Aapo Hyvarinen et al. (\url{http://www.cs.helsinki.fi/u/ahyvarin/}) to perform Independent Component Analysis (ICA) and Projection Pursuit. } \usage{ fastICA(X, n.comp, alg.typ = c("parallel","deflation"), fun = c("logcosh","exp"), alpha = 1.0, method = c("R","C"), row.norm = FALSE, maxit = 200, tol = 1e-04, verbose = FALSE, w.init = NULL) } \arguments{ \item{X}{a data matrix with \code{n} rows representing observations and \code{p} columns representing variables.} \item{n.comp}{number of components to be extracted} \item{alg.typ}{if \code{alg.typ == "parallel"} the components are extracted simultaneously (the default). if \code{alg.typ == "deflation"} the components are extracted one at a time.} \item{fun}{the functional form of the \eqn{G} function used in the approximation to neg-entropy (see \sQuote{details}).} \item{alpha}{constant in range [1, 2] used in approximation to neg-entropy when \code{fun == "logcosh"}} \item{method}{if \code{method == "R"} then computations are done exclusively in \R (default). The code allows the interested \R user to see exactly what the algorithm does. if \code{method == "C"} then C code is used to perform most of the computations, which makes the algorithm run faster. During compilation the C code is linked to an optimized BLAS library if present, otherwise stand-alone BLAS routines are compiled.} \item{row.norm}{a logical value indicating whether rows of the data matrix \code{X} should be standardized beforehand.} \item{maxit}{maximum number of iterations to perform.} \item{tol}{a positive scalar giving the tolerance at which the un-mixing matrix is considered to have converged.} \item{verbose}{a logical value indicating the level of output as the algorithm runs.} \item{w.init}{Initial un-mixing matrix of dimension \code{c(n.comp, n.comp)}. If \code{NULL} (default) then a matrix of normal r.v.'s is used.} } \details{ \bold{Independent Component Analysis (ICA)} The data matrix X is considered to be a linear combination of non-Gaussian (independent) components i.e. X = SA where columns of S contain the independent components and A is a linear mixing matrix. In short ICA attempts to \sQuote{un-mix} the data by estimating an un-mixing matrix W where XW = S. Under this generative model the measured \sQuote{signals} in X will tend to be `more Gaussian' than the source components (in S) due to the Central Limit Theorem. Thus, in order to extract the independent components/sources we search for an un-mixing matrix W that maximizes the non-gaussianity of the sources. In FastICA, non-gaussianity is measured using approximations to neg-entropy (\eqn{J}) which are more robust than kurtosis-based measures and fast to compute. The approximation takes the form \deqn{J(y) = [E\{G(y)\}-E\{G(v)\}]^2}{J(y) = [E G(y) - E G(v)]^2} where \eqn{v} is a N(0,1) r.v. The following choices of G are included as options \eqn{G(u)=\frac{1}{\alpha} \log \cosh (\alpha u)}{G(u) = 1/alpha log cosh (alpha u)} and \eqn{G(u)=-\exp(u^2/2)}{G(u) = -exp(-u^2/2)}. \bold{Algorithm} First, the data are centered by subtracting the mean of each column of the data matrix X. The data matrix is then \sQuote{whitened} by projecting the data onto its principal component directions i.e. X -> XK where K is a pre-whitening matrix. The number of components can be specified by the user. The ICA algorithm then estimates a matrix W s.t XKW = S . W is chosen to maximize the neg-entropy approximation under the constraints that W is an orthonormal matrix. This constraint ensures that the estimated components are uncorrelated. The algorithm is based on a fixed-point iteration scheme for maximizing the neg-entropy. \bold{Projection Pursuit} In the absence of a generative model for the data the algorithm can be used to find the projection pursuit directions. Projection pursuit is a technique for finding `interesting' directions in multi-dimensional datasets. These projections and are useful for visualizing the dataset and in density estimation and regression. Interesting directions are those which show the least Gaussian distribution, which is what the FastICA algorithm does. } \value{A list containing the following components \item{X}{pre-processed data matrix} \item{K}{pre-whitening matrix that projects data onto the first \code{n.comp} principal components.} \item{W}{estimated un-mixing matrix (see definition in details)} \item{A}{estimated mixing matrix} \item{S}{estimated source matrix} } \references{ A. Hyvarinen and E. Oja (2000) Independent Component Analysis: Algorithms and Applications, \emph{Neural Networks}, \bold{13(4-5)}:411-430 } \author{ J L Marchini and C Heaton } \seealso{\code{\link{ica.R.def}}, \code{\link{ica.R.par}}} \examples{ #--------------------------------------------------- #Example 1: un-mixing two mixed independent uniforms #--------------------------------------------------- S <- matrix(runif(10000), 5000, 2) A <- matrix(c(1, 1, -1, 3), 2, 2, byrow = TRUE) X <- S \%*\% A a <- fastICA(X, 2, alg.typ = "parallel", fun = "logcosh", alpha = 1, method = "C", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) par(mfrow = c(1, 3)) plot(a$X, main = "Pre-processed data") plot(a$X \%*\% a$K, main = "PCA components") plot(a$S, main = "ICA components") #-------------------------------------------- #Example 2: un-mixing two independent signals #-------------------------------------------- S <- cbind(sin((1:1000)/20), rep((((1:200)-100)/100), 5)) A <- matrix(c(0.291, 0.6557, -0.5439, 0.5572), 2, 2) X <- S \%*\% A a <- fastICA(X, 2, alg.typ = "parallel", fun = "logcosh", alpha = 1, method = "R", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) par(mfcol = c(2, 3)) plot(1:1000, S[,1 ], type = "l", main = "Original Signals", xlab = "", ylab = "") plot(1:1000, S[,2 ], type = "l", xlab = "", ylab = "") plot(1:1000, X[,1 ], type = "l", main = "Mixed Signals", xlab = "", ylab = "") plot(1:1000, X[,2 ], type = "l", xlab = "", ylab = "") plot(1:1000, a$S[,1 ], type = "l", main = "ICA source estimates", xlab = "", ylab = "") plot(1:1000, a$S[, 2], type = "l", xlab = "", ylab = "") #----------------------------------------------------------- #Example 3: using FastICA to perform projection pursuit on a # mixture of bivariate normal distributions #----------------------------------------------------------- if(require(MASS)){ x <- mvrnorm(n = 1000, mu = c(0, 0), Sigma = matrix(c(10, 3, 3, 1), 2, 2)) x1 <- mvrnorm(n = 1000, mu = c(-1, 2), Sigma = matrix(c(10, 3, 3, 1), 2, 2)) X <- rbind(x, x1) a <- fastICA(X, 2, alg.typ = "deflation", fun = "logcosh", alpha = 1, method = "R", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) par(mfrow = c(1, 3)) plot(a$X, main = "Pre-processed data") plot(a$X \%*\% a$K, main = "PCA components") plot(a$S, main = "ICA components") } } \keyword{multivariate}