fastICA/0000755000176000001440000000000014726261631011564 5ustar ripleyusersfastICA/tests/0000755000176000001440000000000013013621652012715 5ustar ripleyusersfastICA/tests/one-component.R0000644000176000001440000000076513013621652015631 0ustar ripleyusers## failed in 1.1-9 library(fastICA) set.seed(49394) S <- matrix(runif(10000), 5000, 2) A <- matrix(c(1, 1, -1, 3), 2, 2, byrow = TRUE) X <- S%*%A a <- fastICA(X, 1, alg.typ = "parallel", fun = "logcosh", alpha = 1, method = "C", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) # good a <- fastICA(X, 1, alg.typ = "parallel", fun = "logcosh", alpha = 1, method = "R", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) # fails fastICA/MD50000644000176000001440000000110514726261631012071 0ustar ripleyusers69089c69665cd2b9270157091af65a74 *DESCRIPTION 42f8cf9076b983c5611fc628f614a114 *NAMESPACE 066745157170b756421854be7fbabb10 *R/fastICA.R 40621dbca246a248256ae059e627a613 *inst/HISTORY 3337902acc88f55c46cc1dc68be752ef *inst/README e2e9a06c7266d7de120f4644c0c07cdf *man/fastICA.Rd 6af3bdac63e29da7ca67a0b8a520a74f *man/ica.R.def.Rd e4cfec0c6d0723b8f7f70436baeae932 *man/ica.R.par.Rd 2a6f9e9e044a78154d3cfda5936d6f48 *src/Makevars ec2c3597b1c080f123eafb0f3130f153 *src/fastICA-win.def 250a9c08ba76dd13d102d4b750ec1dcf *src/ica.c 03b612d8ff2a470ae415f68ea2e55afa *tests/one-component.R fastICA/R/0000755000176000001440000000000013013621652011754 5ustar ripleyusersfastICA/R/fastICA.R0000644000176000001440000001664213013621652013362 0ustar ripleyusersfastICA <- function (X, n.comp, alg.typ = c("parallel","deflation"), fun = c("logcosh", "exp"), alpha = 1, method = c("R", "C"), row.norm = FALSE, maxit = 200, tol = 1e-04, verbose = FALSE, w.init=NULL) { dd <- dim(X) d <- dd[dd != 1L] if (length(d) != 2L) stop("data must be matrix-conformal") X <- if (length(d) != length(dd)) matrix(X, d[1L], d[2L]) else as.matrix(X) if (alpha < 1 || alpha > 2) stop("alpha must be in range [1,2]") method <- match.arg(method) alg.typ <- match.arg(alg.typ) fun <- match.arg(fun) n <- nrow(X) p <- ncol(X) if (n.comp > min(n, p)) { message("'n.comp' is too large: reset to ", min(n, p)) n.comp <- min(n, p) } if(is.null(w.init)) w.init <- matrix(rnorm(n.comp^2),n.comp,n.comp) else { if(!is.matrix(w.init) || length(w.init) != (n.comp^2)) stop("w.init is not a matrix or is the wrong size") } if (method == "R") { if (verbose) message("Centering") X <- scale(X, scale = FALSE) X <- if (row.norm) t(scale(X, scale=row.norm)) else t(X) if (verbose) message("Whitening") V <- X %*% t(X)/n s <- La.svd(V) D <- diag(c(1/sqrt(s$d))) K <- D %*% t(s$u) K <- matrix(K[1:n.comp, ], n.comp, p) X1 <- K %*% X a <- if (alg.typ == "deflation") ica.R.def(X1, n.comp, tol = tol, fun = fun, alpha = alpha, maxit = maxit, verbose = verbose, w.init = w.init) else if (alg.typ == "parallel") ica.R.par(X1, n.comp, tol = tol, fun = fun, alpha = alpha, maxit = maxit, verbose = verbose, w.init = w.init) w <- a %*% K S <- w %*% X A <- t(w) %*% solve(w %*% t(w)) return(list(X = t(X), K = t(K), W = t(a), A = t(A), S = t(S))) } else if (method == "C") { a <- .C(icainc_JM, as.double(X), as.double(w.init), as.integer(p), as.integer(n), as.integer(n.comp), as.double(alpha), as.integer(1), as.integer(row.norm), as.integer(1L + (fun == "exp")), as.integer(maxit), as.double(tol), as.integer(alg.typ != "parallel"), as.integer(verbose), X = double(p * n), K = double(n.comp * p), W = double(n.comp * n.comp), A = double(p * n.comp), S = double(n.comp * n)) X1 <- matrix(a$X, n, p) K <- matrix(a$K, p, n.comp) W <- matrix(a$W, n.comp, n.comp) A <- matrix(a$A, n.comp, p) S <- matrix(a$S, n, n.comp) list(X = X1, K = K, W = W, A = A, S = S) } } ica.R.def <- function (X, n.comp, tol, fun, alpha, maxit, verbose, w.init) { if (verbose && fun == "logcosh") message("Deflation FastICA using logcosh approx. to neg-entropy function") if (verbose && fun =="exp") message("Deflation FastICA using exponential approx. to neg-entropy function") p <- ncol(X) W <- matrix(0, n.comp, n.comp) for (i in 1:n.comp) { if (verbose) message("Component ", i) w <- matrix(w.init[i,], n.comp, 1) if (i > 1) { t <- w t[1:length(t)] <- 0 for (u in 1:(i - 1)) { k <- sum(w * W[u, ]) t <- t + k * W[u, ] } w <- w - t } w <- w/sqrt(sum(w^2)) lim <- rep(1000, maxit) it <- 1 if (fun == "logcosh") { while (lim[it] > tol && it < maxit) { wx <- t(w) %*% X gwx <- tanh(alpha * wx) gwx <- matrix(gwx, n.comp, p, byrow = TRUE) xgwx <- X * gwx v1 <- apply(xgwx, 1, FUN = mean) g.wx <- alpha * (1 - (tanh(alpha * wx))^2) v2 <- mean(g.wx) * w w1 <- v1 - v2 w1 <- matrix(w1, n.comp, 1) it <- it + 1 if (i > 1) { t <- w1 t[1:length(t)] <- 0 for (u in 1:(i - 1)) { k <- sum(w1 * W[u, ]) t <- t + k * W[u, ] } w1 <- w1 - t } w1 <- w1/sqrt(sum(w1^2)) lim[it] <- Mod(Mod(sum((w1 * w))) - 1) if (verbose) message("Iteration ", it - 1, " tol = ", format(lim[it])) w <- matrix(w1, n.comp, 1) } } if (fun == "exp") { while (lim[it] > tol && it < maxit) { wx <- t(w) %*% X gwx <- wx * exp(-(wx^2)/2) gwx <- matrix(gwx, n.comp, p, byrow = TRUE) xgwx <- X * gwx v1 <- apply(xgwx, 1, FUN = mean) g.wx <- (1 - wx^2) * exp(-(wx^2)/2) v2 <- mean(g.wx) * w w1 <- v1 - v2 w1 <- matrix(w1, n.comp, 1) it <- it + 1 if (i > 1) { t <- w1 t[1:length(t)] <- 0 for (u in 1:(i - 1)) { k <- sum(w1 * W[u, ]) t <- t + k * W[u, ] } w1 <- w1 - t } w1 <- w1/sqrt(sum(w1^2)) lim[it] <- Mod(Mod(sum((w1 * w))) - 1) if (verbose) message("Iteration ", it - 1, " tol = ", format(lim[it])) w <- matrix(w1, n.comp, 1) } } W[i, ] <- w } W } ica.R.par <- function (X, n.comp, tol, fun, alpha, maxit, verbose, w.init) { Diag <- function(d) if(length(d) > 1L) diag(d) else as.matrix(d) p <- ncol(X) W <- w.init sW <- La.svd(W) W <- sW$u %*% Diag(1/sW$d) %*% t(sW$u) %*% W W1 <- W lim <- rep(1000, maxit) it <- 1 if (fun == "logcosh") { if (verbose) message("Symmetric FastICA using logcosh approx. to neg-entropy function") while (lim[it] > tol && it < maxit) { wx <- W %*% X gwx <- tanh(alpha * wx) v1 <- gwx %*% t(X)/p g.wx <- alpha * (1 - (gwx)^2) v2 <- Diag(apply(g.wx, 1, FUN = mean)) %*% W W1 <- v1 - v2 sW1 <- La.svd(W1) W1 <- sW1$u %*% Diag(1/sW1$d) %*% t(sW1$u) %*% W1 lim[it + 1] <- max(Mod(Mod(diag(W1 %*% t(W))) - 1)) W <- W1 if (verbose) message("Iteration ", it, " tol = ", format(lim[it + 1])) it <- it + 1 } } if (fun == "exp") { if (verbose) message("Symmetric FastICA using exponential approx. to neg-entropy function") while (lim[it] > tol && it < maxit) { wx <- W %*% X gwx <- wx * exp(-(wx^2)/2) v1 <- gwx %*% t(X)/p g.wx <- (1 - wx^2) * exp(-(wx^2)/2) v2 <- Diag(apply(g.wx, 1, FUN = mean)) %*% W W1 <- v1 - v2 sW1 <- La.svd(W1) W1 <- sW1$u %*% Diag(1/sW1$d) %*% t(sW1$u) %*% W1 lim[it + 1] <- max(Mod(Mod(diag(W1 %*% t(W))) - 1)) W <- W1 if (verbose) message("Iteration ", it, " tol = ", format(lim[it + 1])) it <- it + 1 } } W } fastICA/src/0000755000176000001440000000000014726056110012345 5ustar ripleyusersfastICA/src/fastICA-win.def0000644000176000001440000000005413013621652015066 0ustar ripleyusersLIBRARY fastICA.dll EXPORTS R_init_fastICA fastICA/src/Makevars0000644000176000001440000000006013013621652014032 0ustar ripleyusersPKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) fastICA/src/ica.c0000644000176000001440000004731314661373667013276 0ustar ripleyusers#define USE_FC_LEN_T 1 #include #include #include #include #include #include static int min_JM (int, int); static int max_JM (int, int); static void rowcentre_JM (double *, int, int); static void colstandard_JM (double *, int, int); static void rowstd_JM (double *, int, int, int); static void transpose_mat_JM (double *, int, int, double *); static void mmult_JM (double *, int, int, double *, int, int, double *); static void orthog_mat_JM (double *, int, double *); static void gramsch_JM (double *, int, int, int); static void svd_JM (double *, int, int, double *, double *, double *); static void Symm_logcosh_JM (double *, int, double *, int, int, double, double *, double *); static void Symm_exp_JM (double *, int, double *, int, int, double, double *, double *); static void Def_logcosh_JM (double *, int, double *, int, int, double, double *); static void Def_exp_JM (double *, int, double *, int, int, double, double *); static void calc_A_JM(double*, double*, double*, int, int, int, double*, double*); static void calc_K_JM(double*, int, int, double*); #include static void rowcentre_JM (double *ans, int n, int p) { /* mean centres nxp matrix ans */ double tmp; int i, j; for (i = 0; i < n; i++) { tmp = 0; for (j = 0; j < p; j++) { tmp = tmp + ((double) ans[p * i + j]) / p; } for (j = 0; j < p; j++) { ans[p * i + j] -= (double) tmp; } } } static void colstandard_JM (double *ans, int n, int p) { /* transform columns of nxp matrix ans to have zero mean and unit variance */ double tmp[2]; double tmp1; int i, j; for (i = 0; i < p; i++) { tmp[0] = 0; tmp[1] = 0; for (j = 0; j < n; j++) { tmp[0] += (double) ans[p * j + i]; tmp[1] += ((double) ans[p * j + i]) * ((double) ans[p * j + i]); } tmp[0] = tmp[0] / n; tmp1 = (tmp[1] - n * (tmp[0]) * (tmp[0])) / (n - 1); tmp[1] = sqrt (tmp1); for (j = 0; j < n; j++) { ans[p * j + i] = (double) ((((double) ans[p * j + i]) - tmp[0]) / tmp[1]); } } } static void svd_JM (double *mat, int n, int p, double *u, double *d, double *v) { /* calculates svd decomposition of nxp matrix mat */ /* mat is a pointer to an nxp array of doubles */ /* n is a pointer to an integer specifying the no. of rows of mat */ /* p is a pointer to an integer specifying the no. of cols of mat */ /* u is a pointer to a double array of dimension (n,n) */ /* d is a pointer to a double array of dimension min(n,p) */ /* v is a pointer to a double array of dimension (p,p) */ int info, *iwork, lwork, a, b; size_t iwork_size, ilwork, nn = n, pp = p, mm; double *work, *mat1, *u1, *v1; char jobz = 'A'; mm = min_JM(n, p); iwork_size = 8 * (size_t) mm; a = max_JM(n, p); b = (int)(4 * mm * mm + 4 * mm); ilwork = 3 * mm * mm + max_JM(a, b); /* FIXME: Windows objected here are nm, pp are size_t, so long long But they need to be less that int since passed as int* In any case, this is 32-bit rule */ if (ilwork > INT_MAX) Rf_error("svd on %u x %u exceeds Fortran indexing limits", n, p); work = R_Calloc (ilwork, double); iwork = R_Calloc (iwork_size, int); mat1 = R_Calloc (nn * pp, double); u1 = R_Calloc (nn * nn, double); v1 = R_Calloc (pp * pp, double); transpose_mat_JM (mat, n, p, mat1); lwork = (int) ilwork; F77_CALL (dgesdd) (&jobz, &n, &p, mat1, &n, d, u1, &n, v1, &p, work, &lwork, iwork, &info FCONE); transpose_mat_JM (u1, n, n, u); transpose_mat_JM (v1, p, p, v); R_Free (mat1); R_Free (u1); R_Free (v1); R_Free (work); R_Free (iwork); } static void transpose_mat_JM (double *mat, int n, int p, double *ans) { /* transpose nxp matrix mat */ int i, j; for (i = 0; i < n; i++) { for (j = 0; j < p; j++) { *(ans + j * n + i) = *(mat + i * p + j); } } } static int min_JM (int a, int b) { /* find minimum of a and b */ int ans; ans = b; if (a < b) ans = a; return ans; } static int max_JM (int a, int b) { /* find maximum of a and b */ int ans; ans = b; if (a > b) ans = a; return ans; } static void mmult_JM (double *A, int n, int p, double *B, int q, int r, double *C) { /* matrix multiplication using FORTRAN BLAS routine SGEMM */ /* A is (n*p) and B is (q*r), A*B returned to C */ double alpha = 1.0, beta = 0.0; int M, K, N; char transA = 'N', transB = 'N'; if (p != q) { Rf_error ("Error, matrices not suitable\nfor multiplication"); } else { M = n; K = p; N = r; F77_CALL (dgemm) (&transA, &transB, &N, &M, &K, &alpha, B, &N, A, &K, &beta, C, &N FCONE FCONE); } } static void orthog_mat_JM (double *mat, int e, double *orthog) { /* take Wmat, (e*e), and return orthogonalized version to orthog_W */ double *u, *v, *d, *temp; int i; size_t ee = e; u = R_Calloc (ee * ee, double); d = R_Calloc (ee, double); v = R_Calloc (ee * ee, double); temp = R_Calloc (ee * ee, double); svd_JM (mat, e, e, u, d, v); for (i = 0; i < e; i++) { temp[i * e + i] = 1 / (d[i]); } mmult_JM (u, e, e, temp, e, e, v); transpose_mat_JM (u, e, e, temp); mmult_JM (v, e, e, temp, e, e, u); mmult_JM (u, e, e, mat, e, e, orthog); R_Free (u); R_Free (v); R_Free (d); R_Free (temp); } static void Symm_logcosh_JM (double *w_init, int e, double *data, int f, int p, double alpha, double *w_final, double *Tol) { /* Function that carries out Symmetric ICA using a logcosh approximation to the neg. entropy function */ double *mat1, *mat2, *mat3, *mat4, *mat5, *mat6; int i, j; double mean; if (e != f) { Rf_error ("error in Symm_logcosh_JM, dims dont match"); } else { size_t es = (size_t)e * (size_t)e; size_t ep = (size_t)e * (size_t)p; mat1 = R_Calloc (ep, double); mat2 = R_Calloc (ep, double); mat3 = R_Calloc (es, double); mat4 = R_Calloc (es, double); mat5 = R_Calloc (es, double); mat6 = R_Calloc (es, double); mmult_JM (w_init, e, e, data, e, p, mat1); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat1[i * p + j] = tanh (alpha * mat1[i * p + j]); } } transpose_mat_JM (data, e, p, mat2); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat2[i * p + j] = (mat2[i * p + j]) / p; } } mmult_JM (mat1, e, p, mat2, p, e, mat3); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat1[i * p + j] = (alpha * (1 - (mat1[i * p + j]) * (mat1[i * p + j]))); } } for (i = 0; i < e; i++) { mean = 0; for (j = 0; j < p; j++) { mean += ((mat1[i * p + j]) / p); } mat4[i * e + i] = mean; } mmult_JM (mat4, e, e, w_init, e, e, mat5); for (i = 0; i < e; i++) { for (j = 0; j < e; j++) { mat4[i * e + j] = (mat3[i * e + j] - mat5[i * e + j]); } } transpose_mat_JM (w_init, e, e, mat6); orthog_mat_JM (mat4, e, w_final); mmult_JM (w_final, e, e, mat6, e, e, mat5); mean = 0; for (i = 0; i < e; i++) { if (fabs (1 - fabs (mat5[i * e + i])) > mean) { mean = (fabs (1 - fabs (mat5[i * e + i]))); } } *Tol = mean; R_Free (mat1); R_Free (mat2); R_Free (mat3); R_Free (mat4); R_Free (mat5); R_Free (mat6); } } static void Def_logcosh_JM (double *w_init, int e, double *data, int f, int p, double alpha, double *w_final) { /* Function that carries out Deflation ICA using an logcosh approximation to the neg. entropy function */ double *mat1, *mat2, *mat3, *mat4; int i, j; double mean; if (e != f) { Rf_error ("error in Def_logcosh_JM, dims dont match"); } else { mat1 = R_Calloc (p, double); mat2 = R_Calloc ((size_t)e * (size_t)p, double); mat3 = R_Calloc (e, double); mat4 = R_Calloc (e, double); mmult_JM (w_init, 1, e, data, e, p, mat1); for (i = 0; i < p; i++) { mat1[i] = tanh (alpha * mat1[i]); } transpose_mat_JM (data, e, p, mat2); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat2[i * p + j] = (mat2[i * p + j]) / p; } } mmult_JM (mat1, 1, p, mat2, p, e, mat3); for (i = 0; i < p; i++) { mat1[i] = (alpha * (1 - (mat1[i]) * (mat1[i]))); } mean = 0; for (j = 0; j < p; j++) { mean += ((mat1[j]) / p); } for (i = 0; i < e; i++) { mat4[i] = (w_init[i]) * mean; } for (i = 0; i < e; i++) { w_final[i] = (mat3[i] - mat4[i]); } R_Free (mat1); R_Free (mat2); R_Free (mat3); R_Free (mat4); } } static void Symm_exp_JM (double *w_init, int e, double *data, int f, int p, double alpha, double *w_final, double *Tol) { /* Function that carries out Symmetric ICA using a exponential approximation to the neg. entropy function */ double *mat1, *mat2, *mat3, *mat4, *mat5, *mat0, *mat6; int i, j; double mean; if (e != f) { Rf_error ("error in Symm_exp_JM, dims dont match"); } else { size_t ep = (size_t)e * (size_t)p; size_t ee = (size_t)e * (size_t)e; mat0 = R_Calloc (ep, double); mat1 = R_Calloc (ep, double); mat2 = R_Calloc (ep, double); mat3 = R_Calloc (ee, double); mat4 = R_Calloc (ee, double); mat5 = R_Calloc (ee, double); mat6 = R_Calloc (ee, double); mmult_JM (w_init, e, e, data, e, p, mat1); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat0[i * p + j] = (mat1[i * p + j]) * exp (-0.5 * (mat1[i * p + j]) * (mat1[i * p + j])); } } transpose_mat_JM (data, e, p, mat2); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat2[i * p + j] = (mat2[i * p + j]) / p; } } mmult_JM (mat0, e, p, mat2, p, e, mat3); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat1[i * p + j] = ((1 - (mat1[i * p + j]) * (mat1[i * p + j])) * exp (-0.5 * (mat1 [i * p + j]) * (mat1 [i * p + j]))); } } for (i = 0; i < e; i++) { mean = 0; for (j = 0; j < p; j++) { mean += ((mat1[i * p + j]) / p); } mat4[i * e + i] = mean; } mmult_JM (mat4, e, e, w_init, e, e, mat5); for (i = 0; i < e; i++) { for (j = 0; j < e; j++) { mat4[i * e + j] = (mat3[i * e + j] - mat5[i * e + j]); } } transpose_mat_JM (w_init, e, e, mat6); orthog_mat_JM (mat4, e, w_final); mmult_JM (w_final, e, e, mat6, e, e, mat5); mean = 0; for (i = 0; i < e; i++) { if (fabs (1 - fabs (mat5[i * e + i])) > mean) { mean = (fabs (1 - fabs (mat5[i * e + i]))); } } *Tol = mean; R_Free (mat1); R_Free (mat2); R_Free (mat3); R_Free (mat4); R_Free (mat5); R_Free (mat0); R_Free (mat6); } } static void Def_exp_JM (double *w_init, int e, double *data, int f, int p, double alpha, double *w_final) { /* Function that carries out Deflation ICA using an exponential approximation to the neg. entropy function */ double *mat1, *mat2, *mat3, *mat4; int i, j; double mean; if (e != f) { Rf_error ("error in Def_exp_JM, dims dont match"); } else { mat1 = R_Calloc (p, double); mat2 = R_Calloc ((size_t)e * (size_t)p, double); mat3 = R_Calloc (e, double); mat4 = R_Calloc (e, double); mmult_JM (w_init, 1, e, data, e, p, mat1); for (i = 0; i < p; i++) { mat1[i] = ((mat1[i]) * exp (-0.5 * (mat1[i]) * (mat1[i]))); } transpose_mat_JM (data, e, p, mat2); for (i = 0; i < e; i++) { for (j = 0; j < p; j++) { mat2[i * p + j] = (mat2[i * p + j]) / p; } } mmult_JM (mat1, 1, p, mat2, p, e, mat3); mmult_JM (w_init, 1, e, data, e, p, mat1); for (i = 0; i < p; i++) { mat1[i] = ((1 - (mat1[i]) * (mat1[i])) * exp (-.5 * (mat1[i]) * (mat1[i]))); } mean = 0; for (j = 0; j < p; j++) { mean += ((mat1[j]) / p); } for (i = 0; i < e; i++) { mat4[i] = (w_init[i]) * mean; } for (i = 0; i < e; i++) { w_final[i] = (mat3[i] - mat4[i]); } R_Free (mat1); R_Free (mat2); R_Free (mat3); R_Free (mat4); } } static void gramsch_JM (double *ww, int n, int m, int k) { int ip, jp; double tmp; /* do Gram-Schmidt on row k of (n*m) matrix ww */ k -= 1; if (k > n) { Rf_error ("Error in gramsch"); } else { for (ip = 0; ip < k; ip++) { tmp = 0; for (jp = 0; jp < m; jp++) { tmp += ((ww[m * ip + jp]) * (ww[m * k + jp])); } for (jp = 0; jp < m; jp++) { ww[m * k + jp] = (ww[m * k + jp] - ((ww[m * ip + jp]) * tmp)); } } } } static void rowstd_JM (double *ww, int n, int m, int k) { /* for ww (n*m), make ||ww[k, ]|| equal 1 */ double tmp = 0; int i; k -= 1; if (k > n) { Rf_error ("Error in rowstd"); } else { for (i = 0; i < m; i++) { tmp += ((ww[k * m + i]) * (ww[k * m + i])); } tmp = sqrt (tmp); for (i = 0; i < m; i++) { ww[k * m + i] = ((ww[k * m + i]) / tmp); } } } static void calc_K_JM(double *x, int n, int p, double *K) { int i, j; double *xxt, *xt, *u, *d, *v, *temp1, *temp2; size_t nn = n, pp = p; xxt = R_Calloc (nn * nn, double); xt = R_Calloc (nn * pp, double); /* transpose x matrix */ transpose_mat_JM (x, n, p, xt); /* calculate sample covariance matrix xxt */ mmult_JM (x, n, p, xt, p, n, xxt); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { xxt[n * i + j] = xxt[n * i + j] / p; } } R_Free (xt); /* calculate svd decomposition of xxt */ u = R_Calloc (nn * nn, double); d = R_Calloc (nn, double); v = R_Calloc (nn * nn, double); svd_JM (xxt, n, n, u, d, v); /* calculate K matrix*/ temp1 = R_Calloc (nn * nn, double); temp2 = R_Calloc (nn * nn, double); for (i = 0; i < n; i++) { temp1[n * i + i] = 1 / sqrt (d[i]); } transpose_mat_JM (u, n, n, temp2); mmult_JM (temp1, n, n, temp2, n, n, K); R_Free (temp1); R_Free (temp2); R_Free(xxt); R_Free(u); R_Free(d); R_Free(v); } static void calc_A_JM(double *w, double *k, double *data, int e, int n, int p, double *A, double *unmixed_data) { /* calculate un-mixing matrix A */ int i; double *um, *umt, *umumt, *uu, *dd, *vv, *temp1, *temp2, *temp3; size_t nn = n, ee = e; um = R_Calloc (ee * nn, double); umt = R_Calloc (nn * ee, double); mmult_JM (w, e, e, k, e, n, um); mmult_JM (um, e, n, data, n, p, unmixed_data); transpose_mat_JM (um, e, n, umt); umumt = R_Calloc (ee * ee, double); mmult_JM (um, e, n, umt, n, e, umumt); uu = R_Calloc (ee * ee, double); dd = R_Calloc (ee, double); vv = R_Calloc (ee * ee, double); svd_JM (umumt, e, e, uu, dd, vv); temp1 = R_Calloc (ee * ee, double); for (i = 0; i < e; i++) { temp1[e * i + i] = 1 / (dd[i]); } temp2 = R_Calloc (ee * ee, double); temp3 = R_Calloc (ee * ee, double); transpose_mat_JM (vv, e, e, temp3); mmult_JM (temp3, e, e, temp1, e, e, temp2); transpose_mat_JM (uu, e, e, vv); mmult_JM (temp2, e, e, vv, e, e, uu); mmult_JM (umt, n, e, uu, e, e, A); R_Free(um); R_Free(umt); R_Free(umumt); R_Free(uu); R_Free(dd); R_Free(vv); R_Free(temp1); R_Free(temp2); R_Free(temp3); } // This is the sole function called from R. static void icainc_JM (double *data_matrix, double *w_matrix, int *nn, int *pp, int *ee, double *alpha, int *rowflag, int *colflag, int *funflag, int *maxit, double *lim, int *defflag, int *verbose, double *data_pre, double *Kmat1, double *w_final, double *ansa, double *ansx2) { /* main ICA function */ int i, j, k; int n = *nn, p = *pp, e = *ee; double tol; double *temp_w1, *temp_w2; double *data1, *Kmat, *temp1, *w_init; /* make a copy of the data matrix */ data1 = R_Calloc (n * p, double); for (i = 0; i < n; i++) { for (j = 0; j < p; j++) { data_pre[i * p + j] = data_matrix[i * p + j]; } } /* row center data matrix if required */ if (*rowflag == 1) { rowcentre_JM (data_pre, n, p); if (*verbose == 1) Rprintf ("Centering\n"); } /* standardize columns of data matrix if required*/ if (*colflag == 1) { colstandard_JM (data_pre, n, p); Rprintf("colstandard\n"); } /* calculate pre-whitening matrix Kmat */ if (*verbose == 1) Rprintf ("Whitening\n"); Kmat = R_Calloc (n * n, double); calc_K_JM(data_pre, n, p, Kmat); /* pre-whiten data and reduce dimension from size n to size e */ for (i = 0; i < e; i++) { for (j = 0; j < n; j++) { Kmat1[i * n + j] = Kmat[i * n + j]; } } mmult_JM (Kmat1, e, n, data_pre, n, p, data1); /* calculate initial (orthogonal) unmixing matrix w */ temp1 = R_Calloc (e * e, double); w_init = R_Calloc (e * e, double); for (i = 0; i < e; i++) { for (j = 0; j < e; j++) { temp1[i * e + j] = w_matrix[i * e + j]; } } orthog_mat_JM (temp1, e, w_init); /* Main ICA code */ if (*defflag == 0) { if (*funflag == 1) { if (*verbose == 1) Rprintf("Symmetric FastICA using logcosh approx. to neg-entropy function\n"); i = 1; Symm_logcosh_JM (w_init, e, data1, e, p, *alpha, w_final, &tol); if (*verbose == 1) Rprintf ("Iteration %d tol=%f\n", i, tol); i = 2; while ((tol > (*lim)) && (i < (*maxit))) { Symm_logcosh_JM (w_final, e, data1, e, p, *alpha, w_final, &tol); if (*verbose == 1) Rprintf ("Iteration %d tol=%f\n", i, tol); i += 1; } } if (*funflag == 2) { if (*verbose == 1) Rprintf("Symmetric FastICA using exponential approx. to neg-entropy function\n"); i = 1; Symm_exp_JM (w_init, e, data1, e, p, *alpha, w_final, &tol); if (*verbose == 1) Rprintf ("Iteration %d tol=%f\n", i, tol); i = 2; while ((tol > (*lim)) && (i < (*maxit))) { Symm_exp_JM (w_final, e, data1, e, p, *alpha, w_final, &tol); if (*verbose == 1) Rprintf ("Iteration %d tol=%f\n", i, tol); i += 1; } } } if (*defflag == 1) { temp_w1 = R_Calloc (e, double); temp_w2 = R_Calloc (e, double); if (*funflag == 1) { if (*verbose == 1) Rprintf ("Deflation FastICA using logcosh approx. to neg-entropy function\n"); for (i = 0; i < e; i++) { k = 0; gramsch_JM (w_init, e, e, i + 1); rowstd_JM (w_init, e, e, i + 1); tol = 1; while ((tol > (*lim)) && (k < (*maxit))) { for (j = 0; j < e; j++) { temp_w1[j] = w_init[i * e + j]; } Def_logcosh_JM (temp_w1, e, data1, e, p, *alpha, temp_w2); for (j = 0; j < e; j++) { w_init[i * e + j] = temp_w2[j]; } gramsch_JM (w_init, e, e, i + 1); rowstd_JM (w_init, e, e, i + 1); tol = 0; for (j = 0; j < e; j++) { tol += ((temp_w1[j]) * (w_init[i * e + j])); } tol = (fabs (fabs (tol) - 1)); k += 1; } if (*verbose == 1) Rprintf ("Component %d needed %d iterations tol=%f\n", i + 1, k, tol); } } if (*funflag == 2) { if (*verbose == 1) Rprintf ("Deflation FastICA using exponential approx. to neg-entropy function\n"); for (i = 0; i < e; i++) { k = 0; gramsch_JM (w_init, e, e, i + 1); rowstd_JM (w_init, e, e, i + 1); tol = 1; while ((tol > (*lim)) && (k < (*maxit))) { for (j = 0; j < e; j++) { temp_w1[j] = w_init[i * e + j]; } Def_exp_JM (temp_w1, e, data1, e, p, *alpha, temp_w2); for (j = 0; j < e; j++) { w_init[i * e + j] = temp_w2[j]; } gramsch_JM (w_init, e, e, i + 1); rowstd_JM (w_init, e, e, i + 1); tol = 0; for (j = 0; j < e; j++) { tol += ((temp_w1[j]) * (w_init[i * e + j])); } tol = (fabs (fabs (tol) - 1)); k += 1; } if (*verbose == 1) Rprintf ("Component %d needed %d iterations tol=%f\n", i + 1, k, tol); } } for (i = 0; i < e; i++) { for (j = 0; j < e; j++) { w_final[i * e + j] = w_init[i * e + j]; } } R_Free (temp_w1); R_Free (temp_w2); } /* calculate mixing matrix ansa */ calc_A_JM(w_final, Kmat1, data_pre, e, n, p, ansa, ansx2); R_Free (data1); R_Free (Kmat); R_Free (temp1); R_Free (w_init); } #include static const R_CMethodDef CEntries[] = { {"icainc_JM", (DL_FUNC) &icainc_JM, 18}, {NULL, NULL, 0} }; void R_init_fastICA(DllInfo *dll) { R_registerRoutines(dll, CEntries, NULL, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } fastICA/NAMESPACE0000644000176000001440000000015613124700055012772 0ustar ripleyusersuseDynLib(fastICA, .registration = TRUE) importFrom("stats", "rnorm") export(fastICA, ica.R.def, ica.R.par) fastICA/inst/0000755000176000001440000000000013013621652012530 5ustar ripleyusersfastICA/inst/README0000644000176000001440000000213613013621651013411 0ustar ripleyusersR Package - fastICA INTRODUCTION This file explains how to install the R package fastICA. I assume that you have a basic knowledge of UNIX/LINUX and that you already have R installed on your machine. INSTALLATION 1. Download the g-zipped tar archive fastICA_*.*-*.tar.gz 2. Install the package using R CMD INSTALL -l /path/to/LIB fastICA_*.*-*.tar.gz /path/to/LIB should be the library directory where you wish to put the package. If you have root access to your machine then use R CMD INSTALL fastICA_*.*-*.tar.gz and the package will be put into the default R library tree. 5. You can then start an R session and load the package using library(fastICA,lib.loc="/path/to/LIB") or library(fastICA) (if you installed the package into the default R library tree) FUNCTIONS The package contains the following functions fastICA - FastICA algorithm ica.R.def - R code for FastICA using a deflation scheme ica.R.par - R code for FastICA using a parallel scheme PROBLEMS/SUGGESTIONS If you have any problems/suggestions using this package, please email ripley@stats.ox.ac.uk fastICA/inst/HISTORY0000644000176000001440000000261313013621651013615 0ustar ripleyusers2001-08-29 - Version 1.0-1 -------------------------- -Cleaned up the help files a bit 2002-01-15 - Version 1.1-1 -------------------------- ** Changes suggested by B D Ripley ** -changed to Lapack SGESDD routine and calculate optimal workspace to increase speed -better error checking within the C code -more transparent C code -FastICA function replaced by fastICA and takes standard (nxp) data matrix as argument 2002-09-23 - Version 1.1-2 -------------------------- -all svd calls now done using La.svd() using method "dgesdd" -configure.win script added to package 2003-03-12 - Version 1.1-3 -------------------------- -fixed memory leak in function calc_K_JM in file ica.c 2003-04-04 - Version 1.1-4 -------------------------- -made code R 1.7.0 compatible 2004-06-08 - Version 1.1-5 -------------------------- -changed detection of blas & lapack, to be MacOS X compatible (no lsame in BLAS). 2004-12-23 - Version 1.1-6 -------------------------- - implemented Brian's changes 2006-05-12 - Version 1.1-7 -------------------------- - implemented change to fastICA function scaling 2006-05-16 - Version 1.1-8 -------------------------- - Brian made changes to separate out slamc.f and ensure it is compiled without optimization 2007-10-10 - Version 1.1-9 -------------------------- - Changes need to avoid looping on 2.6.0 under Windows. - Add NAMESPACE - BDR takes over as maintainer. fastICA/man/0000755000176000001440000000000014123636630012333 5ustar ripleyusersfastICA/man/fastICA.Rd0000644000176000001440000001624114123636630014100 0ustar ripleyusers\name{fastICA} \alias{fastICA} \title{FastICA algorithm} \description{ This is an R and C code implementation of the FastICA algorithm of Aapo Hyvarinen et al. (\url{https://www.cs.helsinki.fi/u/ahyvarin/}) to perform Independent Component Analysis (ICA) and Projection Pursuit. } \usage{ fastICA(X, n.comp, alg.typ = c("parallel","deflation"), fun = c("logcosh","exp"), alpha = 1.0, method = c("R","C"), row.norm = FALSE, maxit = 200, tol = 1e-04, verbose = FALSE, w.init = NULL) } \arguments{ \item{X}{a data matrix with \code{n} rows representing observations and \code{p} columns representing variables.} \item{n.comp}{number of components to be extracted} \item{alg.typ}{if \code{alg.typ == "parallel"} the components are extracted simultaneously (the default). if \code{alg.typ == "deflation"} the components are extracted one at a time.} \item{fun}{the functional form of the \eqn{G} function used in the approximation to neg-entropy (see \sQuote{details}).} \item{alpha}{constant in range [1, 2] used in approximation to neg-entropy when \code{fun == "logcosh"}} \item{method}{if \code{method == "R"} then computations are done exclusively in \R (default). The code allows the interested \R user to see exactly what the algorithm does. if \code{method == "C"} then C code is used to perform most of the computations, which makes the algorithm run faster. During compilation the C code is linked to an optimized BLAS library if present, otherwise stand-alone BLAS routines are compiled.} \item{row.norm}{a logical value indicating whether rows of the data matrix \code{X} should be standardized beforehand.} \item{maxit}{maximum number of iterations to perform.} \item{tol}{a positive scalar giving the tolerance at which the un-mixing matrix is considered to have converged.} \item{verbose}{a logical value indicating the level of output as the algorithm runs.} \item{w.init}{Initial un-mixing matrix of dimension \code{c(n.comp, n.comp)}. If \code{NULL} (default) then a matrix of normal r.v.'s is used.} } \details{ \bold{Independent Component Analysis (ICA)} The data matrix X is considered to be a linear combination of non-Gaussian (independent) components i.e. X = SA where columns of S contain the independent components and A is a linear mixing matrix. In short ICA attempts to \sQuote{un-mix} the data by estimating an un-mixing matrix W where XW = S. Under this generative model the measured \sQuote{signals} in X will tend to be `more Gaussian' than the source components (in S) due to the Central Limit Theorem. Thus, in order to extract the independent components/sources we search for an un-mixing matrix W that maximizes the non-gaussianity of the sources. In FastICA, non-gaussianity is measured using approximations to neg-entropy (\eqn{J}) which are more robust than kurtosis-based measures and fast to compute. The approximation takes the form \deqn{J(y) = [E\{G(y)\}-E\{G(v)\}]^2}{J(y) = [E G(y) - E G(v)]^2} where \eqn{v} is a N(0,1) r.v. The following choices of G are included as options \eqn{G(u)=\frac{1}{\alpha} \log \cosh (\alpha u)}{G(u) = 1/alpha log cosh (alpha u)} and \eqn{G(u)=-\exp(u^2/2)}{G(u) = -exp(-u^2/2)}. \bold{Algorithm} First, the data are centered by subtracting the mean of each column of the data matrix X. The data matrix is then \sQuote{whitened} by projecting the data onto its principal component directions i.e. X -> XK where K is a pre-whitening matrix. The number of components can be specified by the user. The ICA algorithm then estimates a matrix W s.t XKW = S . W is chosen to maximize the neg-entropy approximation under the constraints that W is an orthonormal matrix. This constraint ensures that the estimated components are uncorrelated. The algorithm is based on a fixed-point iteration scheme for maximizing the neg-entropy. \bold{Projection Pursuit} In the absence of a generative model for the data the algorithm can be used to find the projection pursuit directions. Projection pursuit is a technique for finding `interesting' directions in multi-dimensional datasets. These projections and are useful for visualizing the dataset and in density estimation and regression. Interesting directions are those which show the least Gaussian distribution, which is what the FastICA algorithm does. } \value{A list containing the following components \item{X}{pre-processed data matrix} \item{K}{pre-whitening matrix that projects data onto the first \code{n.comp} principal components.} \item{W}{estimated un-mixing matrix (see definition in details)} \item{A}{estimated mixing matrix} \item{S}{estimated source matrix} } \references{ A. Hyvarinen and E. Oja (2000) Independent Component Analysis: Algorithms and Applications, \emph{Neural Networks}, \bold{13(4-5)}:411-430 } \author{ J L Marchini and C Heaton } \seealso{\code{\link{ica.R.def}}, \code{\link{ica.R.par}}} \examples{ #--------------------------------------------------- #Example 1: un-mixing two mixed independent uniforms #--------------------------------------------------- S <- matrix(runif(10000), 5000, 2) A <- matrix(c(1, 1, -1, 3), 2, 2, byrow = TRUE) X <- S \%*\% A a <- fastICA(X, 2, alg.typ = "parallel", fun = "logcosh", alpha = 1, method = "C", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) par(mfrow = c(1, 3)) plot(a$X, main = "Pre-processed data") plot(a$X \%*\% a$K, main = "PCA components") plot(a$S, main = "ICA components") #-------------------------------------------- #Example 2: un-mixing two independent signals #-------------------------------------------- S <- cbind(sin((1:1000)/20), rep((((1:200)-100)/100), 5)) A <- matrix(c(0.291, 0.6557, -0.5439, 0.5572), 2, 2) X <- S \%*\% A a <- fastICA(X, 2, alg.typ = "parallel", fun = "logcosh", alpha = 1, method = "R", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) par(mfcol = c(2, 3)) plot(1:1000, S[,1 ], type = "l", main = "Original Signals", xlab = "", ylab = "") plot(1:1000, S[,2 ], type = "l", xlab = "", ylab = "") plot(1:1000, X[,1 ], type = "l", main = "Mixed Signals", xlab = "", ylab = "") plot(1:1000, X[,2 ], type = "l", xlab = "", ylab = "") plot(1:1000, a$S[,1 ], type = "l", main = "ICA source estimates", xlab = "", ylab = "") plot(1:1000, a$S[, 2], type = "l", xlab = "", ylab = "") #----------------------------------------------------------- #Example 3: using FastICA to perform projection pursuit on a # mixture of bivariate normal distributions #----------------------------------------------------------- if(require(MASS)){ x <- mvrnorm(n = 1000, mu = c(0, 0), Sigma = matrix(c(10, 3, 3, 1), 2, 2)) x1 <- mvrnorm(n = 1000, mu = c(-1, 2), Sigma = matrix(c(10, 3, 3, 1), 2, 2)) X <- rbind(x, x1) a <- fastICA(X, 2, alg.typ = "deflation", fun = "logcosh", alpha = 1, method = "R", row.norm = FALSE, maxit = 200, tol = 0.0001, verbose = TRUE) par(mfrow = c(1, 3)) plot(a$X, main = "Pre-processed data") plot(a$X \%*\% a$K, main = "PCA components") plot(a$S, main = "ICA components") } } \keyword{multivariate} fastICA/man/ica.R.def.Rd0000644000176000001440000000222313013621651014304 0ustar ripleyusers\name{ica.R.def} \alias{ica.R.def} \title{R code for FastICA using a deflation scheme} \description{R code for FastICA using a deflation scheme in which the components are estimated one by one. This function is called by the fastICA function. } \usage{ ica.R.def(X, n.comp, tol, fun, alpha, maxit, verbose, w.init) } \arguments{ \item{X}{data matrix} \item{n.comp}{number of components to be extracted} \item{tol}{a positive scalar giving the tolerance at which the un-mixing matrix is considered to have converged.} \item{fun}{the functional form of the \eqn{G} function used in the approximation to negentropy.} \item{alpha}{constant in range [1,2] used in approximation to negentropy when \code{fun == "logcosh"}} \item{maxit}{maximum number of iterations to perform} \item{verbose}{a logical value indicating the level of output as the algorithm runs.} \item{w.init}{Initial value of un-mixing matrix.} } \value{The estimated un-mixing matrix W.} \details{ See the help on \code{\link{fastICA}} for details. } \author{ J L Marchini and C Heaton } \seealso{\code{\link{fastICA}}, \code{\link{ica.R.par}}} \keyword{utilities} fastICA/man/ica.R.par.Rd0000644000176000001440000000223213013621651014330 0ustar ripleyusers\name{ica.R.par} \alias{ica.R.par} \title{R code for FastICA using a parallel scheme} \description{R code for FastICA using a parallel scheme in which the components are estimated simultaneously. This function is called by the fastICA function. } \usage{ ica.R.par(X, n.comp, tol, fun, alpha, maxit, verbose, w.init) } \arguments{ \item{X}{data matrix.} \item{n.comp}{number of components to be extracted.} \item{tol}{a positive scalar giving the tolerance at which the un-mixing matrix is considered to have converged.} \item{fun}{the functional form of the \eqn{G} function used in the approximation to negentropy.} \item{alpha}{constant in range [1,2] used in approximation to negentropy when \code{fun == "logcosh"}.} \item{maxit}{maximum number of iterations to perform.} \item{verbose}{a logical value indicating the level of output as the algorithm runs.} \item{w.init}{Initial value of un-mixing matrix.} } \value{The estimated un-mixing matrix W.} \details{ See the help on \code{\link{fastICA}} for details. } \author{ J L Marchini and C Heaton } \seealso{\code{\link{fastICA}}, \code{\link{ica.R.def}}} \keyword{utilities} fastICA/DESCRIPTION0000644000176000001440000000157514726261631013302 0ustar ripleyusersPackage: fastICA Version: 1.2-7 Date: 2024-12-10 Title: FastICA Algorithms to Perform ICA and Projection Pursuit Authors@R: c(person(given = c("Jonathan", "L"), family = "Marchini", role = "aut"), person(given = "Chris", family = "Heaton", role = "aut"), person(given = "Brian", family = "Ripley", role = c("aut", "cre"), email = "Brian.Ripley@R-project.org")) Depends: R (>= 4.0.0) Suggests: MASS Description: Implementation of FastICA algorithm to perform Independent Component Analysis (ICA) and Projection Pursuit. License: GPL-2 | GPL-3 NeedsCompilation: yes Packaged: 2024-12-10 15:20:40 UTC; ripley Author: Jonathan L Marchini [aut], Chris Heaton [aut], Brian Ripley [aut, cre] Maintainer: Brian Ripley Repository: CRAN Date/Publication: 2024-12-11 10:04:09 UTC