maotai/0000755000175000017500000000000014177000262011655 5ustar nileshnileshmaotai/MD50000644000175000017500000001071514177000262012171 0ustar nileshnileshb3fa4a548bc14e3b8c2b8dc3c87dccab *DESCRIPTION f142f1787fbd58ec16564135c168b659 *LICENSE dd26f3c1842c67284c2a5c196f9c7aee *NAMESPACE 7139a5603b96630c61145063c7d87123 *NEWS.md 2bd0984aedcf09acb6abfd7ed59cbfa0 *R/RcppExports.R 51bfb78b30af308931573295bc3498c7 *R/aux_checkers.R 27d726ee03c804e21875eaad5387508f *R/aux_computation.R 5eee2ac697e448495d5e73c97af8336d *R/aux_ecdf.R 67a3002cea011542f3d77b6fe93bae4c *R/aux_hidden_compute.R bda6adfa97e60d9598c43eb34d7672bd *R/aux_hidden_dist.R 7107f2e8aac0948395e3cf7627851327 *R/aux_hidden_operation.R eb3423edb2f8e2c7c68a191caa4196d7 *R/bmds.R f4ccc6e3cd6e44edeaeb7dc044e9dbd1 *R/boot.mblock.R c37c6ebf43436e87258d9ec929a4361e *R/boot.stationary.R d2813838bd11ac4df10a6bd6b7cb47d4 *R/cayleymenger.R 004adc13642858eb7e4f0725dd7b8734 *R/checkdist.R 358f16e5d5d6fd2ee363f958003a479f *R/checkmetric.R f0ec222883b44112c09e5ecba47f28ef *R/cmds.R 656fe8643f2c7d4647bf214a81a0db4d *R/cov2corr.R ccebef2c3c8d41131cad0db572c4c9ca *R/cov2pcorr.R 2ffe229f65ceec29d60596c502c6d575 *R/dpmeans.R 779736fd573a442619505a4673d273d4 *R/ecdfdist.R 7a60bf389a581a6e493b875a444b1d59 *R/ecdfdist2.R 7b8f309e06853a3703546f2da84a6798 *R/epmeans.R 5ca02918b848d8d82654cc863923a505 *R/kmeanspp.R 85a577726c1096b73895a5a090e9a0a2 *R/later_findmed.R 65dabf4d39adb27399189e81e826ec17 *R/later_install_scipy.R addeab0b35666f835e6da843ba31281e *R/later_matrix1F1.R 78eeb5fb29b95d8cbe94996406e1cca6 *R/later_matrix2F1.R 9170ad32f09650ce40fa9beadfca785e *R/later_mbeta.R b99168f9714d638fe2600c581ab1299f *R/later_mgamma.R cde19e7f011412372fe3099979763056 *R/later_scalar0F1.R 4e9c65415524f68e5426991d1cb10bdb *R/later_scalar1F1.R 4a5fa8fee3aae16ba5e8c3fafcd5beb1 *R/later_scalar2F1.R 30a56bdbdc6cd8df6e8631fee34a579b *R/later_scalarpFq.R 3b5afe7d2b7beaeb923961699b7ba038 *R/lgpa.R 1b43b5efa6455506f7572fabba2b1ce8 *R/lyapunov.R cfd5dc66dadca55e2128d416b792b726 *R/matderiv.R f00182fcdb1cbe15777449982f046573 *R/mmd2test.R 236a449d527f2f4824087132344a76b2 *R/nef.R b47c9df2c3934a39455f77d302a322db *R/nem.R 218d035618ee31c5daf6ba5130689f66 *R/obsolete_distgmm.R fbf6e206ad52f6d3a3d0aeb119823b7c *R/package-maotai.R b82737ec632f146f42893073add7a750 *R/pdeterminant.R edbcb3ddd5271b8374fe41c7646db3cf *R/shortestpath.R 96101622e14735fd82afcb26a00e138e *R/sylvester.R e93a665227b80a5161bfe6271cbcb82b *R/trio.R 49d67fddf4878ec77bb043d07bc73b3b *R/tsne.R 65d2e578e49884d4882fc4a561dd37f8 *R/weiszfeld.R 103e2526dcecc61fcce560a12e39ed94 *R/zzz.R d955df71c5df387687851da0f5c8a9da *README.md 352f34b49caa3fe365121632d21df520 *build/partial.rdb ac5f91eaf31bd02c39c27c2571dadb1a *inst/REFERENCES.bib 8e509cf33257c8230ae3776cc1249a45 *inst/include/maoarma.hpp 17d403e1938655b6eb327af026e91db3 *inst/include/procedures/maoarma_basic.h 263effb529a3aa96fcea82323c3dfc1f *man/bmds.Rd 2c9d5c0e1a1cc0fae2a0a9c274849074 *man/boot.mblock.Rd 1aa10c2aa5b9c58dc1ba9abfb0c3e438 *man/boot.stationary.Rd 14bd1ae1842487a6ed976be18cffb7d5 *man/cayleymenger.Rd 685627ad39a9ccef8165b906c9db7667 *man/checkdist.Rd 83e44f9bfbea73ba0d84f714d8e5012e *man/checkmetric.Rd e247b900a16d3dadb6e2498d07f74bcc *man/cmds.Rd 2e8cdee0902d41df71a6c4923e8afe3a *man/cov2corr.Rd 9e53ef1a9b727248862c7cc7fdd88f25 *man/cov2pcorr.Rd 1f765107f8927d308fdae227f607e85a *man/dpmeans.Rd 2d49541ed6123690eb30f716826b4796 *man/ecdfdist.Rd fff165ab33a4711115041b60ec09a38f *man/ecdfdist2.Rd 1cdbc976bd38f4b0db2fb1e94a0b3950 *man/epmeans.Rd bff8d60ac1752ac074cb738cb8015aea *man/kmeanspp.Rd 9f1edeb021a361539bc43c4a0078b73f *man/lgpa.Rd 591c77266f9bc75e011408faee4d8b3c *man/lyapunov.Rd 1bdc558932a70f86d93952e5ec2b4f6e *man/matderiv.Rd de4bdeb25ad50f6a005871028dd23975 *man/mmd2test.Rd bb7b9dd89e8cbd62aa2619c0370b1273 *man/nef.Rd 2103286fdc3361b142ce0c88a3750208 *man/nem.Rd f8c60820540144bb5fc88010a0b84e27 *man/pdeterminant.Rd ea289390b722cea0e5cf6f79bfce4102 *man/shortestpath.Rd eac282286a43c132dc383b09cf500618 *man/sylvester.Rd 58d05dbbee3cdbd39ad4fdbcc9a7fa33 *man/trio.Rd b6cc5266dcdca322917e361fa2903861 *man/tsne.Rd 6c6d4ef21e830de4144f2cf078a25b07 *man/weiszfeld.Rd 61059660eb073d93e00e8ee054237071 *src/Makevars 61059660eb073d93e00e8ee054237071 *src/Makevars.win 2ce33b4c359ae85f4d536c11ed2ae883 *src/RcppExports.cpp 927e5b5be0b72a5ae1d9c549a6a3891c *src/cpp_bmds.cpp a6680789b1159d28a71f8beaadc4a2c6 *src/cpp_casket.cpp d306f032a33099427a615c852818bf2a *src/cpp_mmds.cpp 1c569eacbf94379171cc9bb3d6d9ea9f *src/cpp_smacof.cpp 0d286353a8df80d22906beae5d9ecf23 *src/evaluations.cpp 1054100501c3b4e5c1bf8d7294cf908c *src/evaluations.h af4a1f1a1f990bd17dd1d94f223724dd *src/src_computations.cpp maotai/NEWS.md0000644000175000017500000000243714176771675013007 0ustar nileshnilesh# maotai 0.2.4 * `cov2corr()` and `cov2pcorr()` are added. # maotai 0.2.3 * A minor typo fixed. # maotai 0.2.2 * Metric multidimensional scaling now uses the original version of SMACOF algorithm written in `C++`. # maotai 0.2.1 * $k$-nearest neighbor search with sparse output is now supported. # maotai 0.2.0 * Change of maintainer's contact and website. # maotai 0.1.9 * Replaced nearest neighbor search with `RANN` package. # maotai 0.1.8 * Fixed a potential bug. * Added SMACOF implementation. # maotai 0.1.7 * Fixed some minor bugs. # maotai 0.1.6 * NEWS reformatted and [package website](https://kisungyou.com/maotai/) is now available. * Added functions : `checkdist()` and `checkmetric()`. # maotai 0.1.5 * Added functions : `bmds()`, `cmds()`, `kmeanspp()`, `nem()`, `nef()`, `tsne()`. * `distgmm()` is removed for better composition of the package. # maotai 0.1.4 ## Bug fixes * Armadillo type-casting errors are fixed. * example visualization `par` settings are corrected. # maotai 0.1.3 * Added functions : `boot.mblock()`, `boot.stationary()`, `epmeans()`, and `weiszfeld()`. # maotai 0.1.2 * `mmd2test()` function is added. # maotai 0.1.1 * Added functions : `dpmeans()`, `distgmm()`, and `ecdfdist()`. # maotai 0.1.0 * initial release on CRAN. maotai/DESCRIPTION0000644000175000017500000000222714177000262013366 0ustar nileshnileshPackage: maotai Type: Package Title: Tools for Matrix Algebra, Optimization and Inference Version: 0.2.4 Authors@R: c(person("Kisung", "You", role = c("aut", "cre"),email = "kisungyou@outlook.com",comment=c(ORCID="0000-0002-8584-459X"))) Description: Matrix is an universal and sometimes primary object/unit in applied mathematics and statistics. We provide a number of algorithms for selected problems in optimization and statistical inference. For general exposition to the topic with focus on statistical context, see the book by Banerjee and Roy (2014, ISBN:9781420095388). Encoding: UTF-8 License: MIT + file LICENSE Suggests: igraph Imports: Matrix, Rcpp, Rdpack, RSpectra, Rtsne, RANN, cluster, labdsv, shapes, stats, utils, fastcluster, dbscan LinkingTo: Rcpp, RcppArmadillo, RcppDist RdMacros: Rdpack RoxygenNote: 7.1.2 URL: https://github.com/kisungyou/maotai BugReports: https://github.com/kisungyou/maotai/issues NeedsCompilation: yes Packaged: 2022-02-03 15:45:01 UTC; kisung Author: Kisung You [aut, cre] () Maintainer: Kisung You Repository: CRAN Date/Publication: 2022-02-03 16:20:02 UTC maotai/README.md0000644000175000017500000000175414176772261013161 0ustar nileshnilesh # Tools for Matrix Algebra, Optimization and Inference Problems [![Travis build status](https://travis-ci.org/kisungyou/maotai.svg?branch=master)](https://travis-ci.org/kisungyou/maotai) [![CRAN status](https://www.r-pkg.org/badges/version/maotai)](https://CRAN.R-project.org/package=maotai) **maotai** is an R package whose name is an acronym for **M**atrix **A**lgebra, **O**p**T**imization, **A**nd **I**nference problems - though I can’t deny motivation from one of [my father’s favorite](https://en.wikipedia.org/wiki/Maotai) for the namesake. More detailed introduction will be added later. ## Installation Install the latest release from [CRAN](https://CRAN.R-project.org/package=maotai) with: ``` r install.packages("maotai") ``` or the up-to-date development version from github: ``` r # install.packages("devtools") devtools::install_github("kisungyou/maotai") ``` maotai/man/0000755000175000017500000000000014176771675012456 5ustar nileshnileshmaotai/man/boot.stationary.Rd0000644000175000017500000000326614176771675016113 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/boot.stationary.R \name{boot.stationary} \alias{boot.stationary} \title{Generate Index for Stationary Bootstrapping} \usage{ boot.stationary(N, p = 0.25) } \arguments{ \item{N}{the number of observations.} \item{p}{parameter for geometric distribution with the size of each block.} } \value{ a vector of length \code{N} for moving block bootstrap sampling. } \description{ Assuming data being dependent with cardinality \code{N}, \code{boot.stationary} returns a vector of index that is used for stationary bootstrapping. To describe, starting points are drawn from uniform distribution over \code{1:N} and the size of each block is determined from geometric distribution with parameter \eqn{p}. } \examples{ \donttest{ ## example : bootstrap confidence interval of mean and variances vec.x = seq(from=0,to=10,length.out=100) vec.y = sin(1.21*vec.x) + 2*cos(3.14*vec.x) + rnorm(100,sd=1.5) data.mu = mean(vec.y) data.var = var(vec.y) ## apply stationary bootstrapping nreps = 50 vec.mu = rep(0,nreps) vec.var = rep(0,nreps) for (i in 1:nreps){ sample.id = boot.stationary(100) sample.y = vec.y[sample.id] vec.mu[i] = mean(sample.y) vec.var[i] = var(sample.y) print(paste("iteration ",i,"/",nreps," complete.", sep="")) } ## visualize opar <- par(no.readonly=TRUE) par(mfrow=c(1,3), pty="s") plot(vec.x, vec.y, type="l", main="1d signal") # 1d signal hist(vec.mu, main="mean CI", xlab="mu") # mean abline(v=data.mu, col="red", lwd=4) hist(vec.var, main="variance CI", xlab="sigma") # variance abline(v=data.var, col="blue", lwd=4) par(opar) } } \references{ \insertRef{politis_stationary_1994}{maotai} } maotai/man/dpmeans.Rd0000644000175000017500000000466214176771675014404 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dpmeans.R \name{dpmeans} \alias{dpmeans} \title{DP-means Algorithm for Clustering Euclidean Data} \usage{ dpmeans( data, lambda = 1, maxiter = 1234, abstol = 1e-06, permute.order = FALSE ) } \arguments{ \item{data}{an \eqn{(n\times p)} data matrix for each row being an observation.} \item{lambda}{a threshold to define a new cluster.} \item{maxiter}{maximum number of iterations.} \item{abstol}{stopping criterion} \item{permute.order}{a logical; \code{TRUE} if random order for permutation is used, \code{FALSE} otherwise.} } \value{ a named list containing \describe{ \item{cluster}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.} \item{centers}{a list containing information for out-of-sample prediction.} } } \description{ DP-means is a nonparametric clustering method motivated by DP mixture model in that the number of clusters is determined by a parameter \eqn{\lambda}. The larger the \eqn{\lambda} value is, the smaller the number of clusters is attained. In addition to the original paper, we added an option to randomly permute an order of updating for each observation's membership as a common heuristic in the literature of cluster analysis. } \examples{ ## define data matrix of two clusters x1 = matrix(rnorm(50*3,mean= 2), ncol=3) x2 = matrix(rnorm(50*3,mean=-2), ncol=3) X = rbind(x1,x2) lab = c(rep(1,50),rep(2,50)) ## run dpmeans with several lambda values solA <- dpmeans(X, lambda= 5)$cluster solB <- dpmeans(X, lambda=10)$cluster solC <- dpmeans(X, lambda=20)$cluster ## visualize the results opar <- par(no.readonly=TRUE) par(mfrow=c(1,4), pty="s") plot(X,col=lab, pch=19, cex=.8, main="True", xlab="x", ylab="y") plot(X,col=solA, pch=19, cex=.8, main="dpmeans lbd=5", xlab="x", ylab="y") plot(X,col=solB, pch=19, cex=.8, main="dpmeans lbd=10", xlab="x", ylab="y") plot(X,col=solC, pch=19, cex=.8, main="dpmeans lbd=20", xlab="x", ylab="y") par(opar) \donttest{ ## let's find variations by permuting orders of update ## used setting : lambda=20, we will 8 runs sol8 <- list() for (i in 1:8){ sol8[[i]] = dpmeans(X, lambda=20, permute.order=TRUE)$cluster } ## let's visualize vpar <- par(no.readonly=TRUE) par(mfrow=c(2,4), pty="s") for (i in 1:8){ pm = paste("permute no.",i,sep="") plot(X,col=sol8[[i]], pch=19, cex=.8, main=pm, xlab="x", ylab="y") } par(vpar) } } \references{ \insertRef{kulis_revisiting_2012}{maotai} } maotai/man/cayleymenger.Rd0000644000175000017500000000133214176771675015430 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cayleymenger.R \name{cayleymenger} \alias{cayleymenger} \title{Cayley-Menger Determinant} \usage{ cayleymenger(data) } \arguments{ \item{data}{an \eqn{(n\times p)} matrix of row-stacked observations.} } \value{ a list containing\describe{ \item{det}{determinant value.} \item{vol}{volume attained from the determinant.} } } \description{ Cayley-Menger determinant is a formula of a \eqn{n}-dimensional simplex with respect to the squares of all pairwise distances of its vertices. } \examples{ ## USE 'IRIS' DATASET data(iris) X = as.matrix(iris[,1:4]) ## COMPUTE CAYLEY-MENGER DETERMINANT # since k=4 < n=149, it should be zero. cayleymenger(X) } maotai/man/boot.mblock.Rd0000644000175000017500000000274614176771675015167 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/boot.mblock.R \name{boot.mblock} \alias{boot.mblock} \title{Generate Index for Moving Block Bootstrapping} \usage{ boot.mblock(N, b = max(2, round(N/10))) } \arguments{ \item{N}{the number of observations.} \item{b}{the size of a block to be drawn.} } \value{ a vector of length \code{N} for moving block bootstrap sampling. } \description{ Assuming data being dependent with cardinality \code{N}, \code{boot.mblock} returns a vector of index that is used for moving block bootstrapping. } \examples{ \donttest{ ## example : bootstrap confidence interval of mean and variances vec.x = seq(from=0,to=10,length.out=100) vec.y = sin(1.21*vec.x) + 2*cos(3.14*vec.x) + rnorm(100,sd=1.5) data.mu = mean(vec.y) data.var = var(vec.y) ## apply moving block bootstrapping nreps = 50 vec.mu = rep(0,nreps) vec.var = rep(0,nreps) for (i in 1:nreps){ sample.id = boot.mblock(100, b=10) sample.y = vec.y[sample.id] vec.mu[i] = mean(sample.y) vec.var[i] = var(sample.y) print(paste("iteration ",i,"/",nreps," complete.", sep="")) } ## visualize opar <- par(no.readonly=TRUE) par(mfrow=c(1,3), pty="s") plot(vec.x, vec.y, type="l", main="1d signal") # 1d signal hist(vec.mu, main="mean CI", xlab="mu") # mean abline(v=data.mu, col="red", lwd=4) hist(vec.var, main="variance CI", xlab="sigma") # variance abline(v=data.var, col="blue", lwd=4) par(opar) } } \references{ \insertRef{kunsch_jackknife_1989}{maotai} } maotai/man/shortestpath.Rd0000644000175000017500000000222314176771675015474 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/shortestpath.R \name{shortestpath} \alias{shortestpath} \title{Find Shortest Path using Floyd-Warshall Algorithm} \usage{ shortestpath(dist) } \arguments{ \item{dist}{either an \eqn{(n\times n)} matrix or a \code{dist} class object.} } \value{ an \eqn{(n\times n)} matrix containing pairwise shortest path length. } \description{ This is a fast implementation of Floyd-Warshall algorithm to find the shortest path in a pairwise sense using \code{RcppArmadillo}. A logical input is also accepted. The given matrix should contain pairwise distance values \eqn{d_{i,j}} where \eqn{0} means there exists no path for node \eqn{i} and {j}. } \examples{ ## simple example : a ring graph # edges exist for pairs A = array(0,c(10,10)) for (i in 1:9){ A[i,i+1] = 1 A[i+1,i] = 1 } A[10,1] <- A[1,10] <- 1 # compute shortest-path and show the matrix sdA <- shortestpath(A) # visualize opar <- par(no.readonly=TRUE) par(pty="s") image(sdA, main="shortest path length for a ring graph") par(opar) } \references{ \insertRef{floyd_algorithm_1962}{maotai} \insertRef{warshall_theorem_1962}{maotai} } maotai/man/ecdfdist.Rd0000644000175000017500000000273414176771675014540 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ecdfdist.R \name{ecdfdist} \alias{ecdfdist} \title{Distance Measures between Multiple Empirical Cumulative Distribution Functions} \usage{ ecdfdist(elist, method = c("KS", "Lp", "Wasserstein"), p = 2, as.dist = FALSE) } \arguments{ \item{elist}{a length \eqn{N} list of \code{ecdf} objects.} \item{method}{name of the distance/dissimilarity measure. Case insensitive.} \item{p}{exponent for \code{Lp} or \code{Wasserstein} distance.} \item{as.dist}{a logical; \code{TRUE} to return \code{dist} object, \code{FALSE} to return an \eqn{(N\times N)} symmetric matrix of pairwise distances.} } \value{ either \code{dist} object of an \eqn{(N\times N)} symmetric matrix of pairwise distances by \code{as.dist} argument. } \description{ We measure distance between two empirical cumulative distribution functions (ECDF). For simplicity, we only take an input of \code{\link[stats]{ecdf}} objects from \pkg{stats} package. } \examples{ \donttest{ ## toy example : 10 of random and uniform distributions mylist = list() for (i in 1:10){ mylist[[i]] = stats::ecdf(stats::rnorm(50, sd=2)) } for (i in 11:20){ mylist[[i]] = stats::ecdf(stats::runif(50, min=-5)) } ## compute Kolmogorov-Smirnov distance dm = ecdfdist(mylist, method="KS") ## visualize mks =" KS distances of 2 Types" opar = par(no.readonly=TRUE) par(pty="s") image(dm[,nrow(dm):1], axes=FALSE, main=mks) par(opar) } } \seealso{ \code{\link[stats]{ecdf}} } maotai/man/ecdfdist2.Rd0000644000175000017500000000340314176771675014614 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/ecdfdist2.R \name{ecdfdist2} \alias{ecdfdist2} \title{Pairwise Measures for Two Sets of Empirical CDFs} \usage{ ecdfdist2(elist1, elist2, method = c("KS", "Lp", "Wasserstein"), p = 2) } \arguments{ \item{elist1}{a length \eqn{M} list of \code{ecdf} objects.} \item{elist2}{a length \eqn{N} list of \code{ecdf} objects.} \item{method}{name of the distance/dissimilarity measure. Case insensitive.} \item{p}{exponent for \code{Lp} or \code{Wasserstein} distance.} } \value{ an \eqn{(M\times N)} matrix of pairwise distances. } \description{ We measure distance between two sets of empirical cumulative distribution functions (ECDF). For simplicity, we only take an input of \code{\link[stats]{ecdf}} objects from \pkg{stats} package. } \examples{ \donttest{ ## toy example # first list : 10 of random and uniform distributions mylist1 = list() for (i in 1:10){ mylist1[[i]] = stats::ecdf(stats::rnorm(50, sd=2))} for (i in 11:20){mylist1[[i]] = stats::ecdf(stats::runif(50, min=-5))} # second list : 15 uniform and random distributions mylist2 = list() for (i in 1:15){ mylist2[[i]] = stats::ecdf(stats::runif(50, min=-5))} for (i in 16:30){mylist2[[i]] = stats::ecdf(stats::rnorm(50, sd=2))} ## compute Kolmogorov-Smirnov distance dm2ks = ecdfdist2(mylist1, mylist2, method="KS") dm2lp = ecdfdist2(mylist1, mylist2, method="lp") dm2wa = ecdfdist2(mylist1, mylist2, method="wasserstein") nrs = nrow(dm2ks) ## visualize opar = par(no.readonly=TRUE) par(mfrow=c(1,3), pty="s") image(dm2ks[,nrs:1], axes=FALSE, main="Kolmogorov-Smirnov") image(dm2lp[,nrs:1], axes=FALSE, main="L2") image(dm2wa[,nrs:1], axes=FALSE, main="Wasserstein") par(opar) } } \seealso{ \code{\link[stats]{ecdf}} \code{\link{ecdfdist}} } maotai/man/epmeans.Rd0000644000175000017500000000341314176771675014376 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/epmeans.R \name{epmeans} \alias{epmeans} \title{EP-means Algorithm for Clustering Empirical Distributions} \usage{ epmeans(elist, k = 2) } \arguments{ \item{elist}{a length \eqn{N} list of either vector or \code{ecdf} objects.} \item{k}{the number of clusters.} } \value{ a named list containing \describe{ \item{cluster}{an integer vector indicating the cluster to which each \code{ecdf} is allocated.} \item{centers}{a length \eqn{k} list of centroid \code{ecdf} objects.} } } \description{ EP-means is a variant of k-means algorithm adapted to cluster multiple empirical cumulative distribution functions under metric structure induced by Earth Mover's Distance. } \examples{ \donttest{ ## two sets of 1d samples, 10 each and add some noise # set 1 : mixture of two gaussians # set 2 : single gamma distribution # generate data elist = list() for (i in 1:10){ elist[[i]] = stats::ecdf(c(rnorm(100, mean=-2), rnorm(50, mean=2))) } for (j in 11:20){ elist[[j]] = stats::ecdf(rgamma(100,1) + rnorm(100, sd=sqrt(0.5))) } # run EP-means with k clusters # change the value below to see different settings myk = 2 epout = epmeans(elist, k=myk) # visualize opar = par(no.readonly=TRUE) par(mfrow=c(1,myk)) for (k in 1:myk){ idk = which(epout$cluster==k) for (i in 1:length(idk)){ if (i<2){ pm = paste("class ",k," (size=",length(idk),")",sep="") plot(elist[[idk[i]]], verticals=TRUE, lwd=0.25, do.points=FALSE, main=pm) } else { plot(elist[[idk[i]]], add=TRUE, verticals=TRUE, lwd=0.25, do.points=FALSE) } plot(epout$centers[[k]], add=TRUE, verticals=TRUE, lwd=2, col="red", do.points=FALSE) } } par(opar) } } \references{ \insertRef{henderson_epmeans_2015}{maotai} } maotai/man/lgpa.Rd0000644000175000017500000000442714176771675013677 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lgpa.R \name{lgpa} \alias{lgpa} \title{Large-scale Generalized Procrustes Analysis} \usage{ lgpa(x, sub.id = 1:(dim(x)[1]), scale = TRUE, reflect = FALSE) } \arguments{ \item{x}{a \eqn{(k\times m\times n)} 3d array, where \eqn{k} is the number of points, \eqn{m} the number of dimensions, and \eqn{n} the number of samples.} \item{sub.id}{a vector of indices for defining anchor points.} \item{scale}{a logical; \code{TRUE} if scaling is applied, \code{FALSE} otherwise.} \item{reflect}{a logical; \code{TRUE} if reflection is required, \code{FALSE} otherwise.} } \value{ a \eqn{(k\times m\times n)} 3d array of aligned samples. } \description{ We modify generalized Procrustes analysis for large-scale data by first setting a subset of anchor points and applying the attained transformation to the rest data. If \code{sub.id} is a vector \code{1:dim(x)[1]}, it uses all observations as anchor points, reducing to the conventional generalized Procrustes analysis. } \examples{ \dontrun{ ## This should be run if you have 'shapes' package installed. library(shapes) data(gorf.dat) ## apply anchor-based method and original procGPA out.proc = shapes::procGPA(gorf.dat, scale=TRUE)$rotated # procGPA from shapes package out.anc4 = lgpa(gorf.dat, sub.id=c(1,4,9,7), scale=TRUE) # use 4 points out.anc7 = lgpa(gorf.dat, sub.id=1:7, scale=TRUE) # use all but 1 point as anchors ## visualize opar = par(no.readonly=TRUE) par(mfrow=c(3,4), pty="s") plot(out.proc[,,1], main="procGPA No.1", pch=18) plot(out.proc[,,2], main="procGPA No.2", pch=18) plot(out.proc[,,3], main="procGPA No.3", pch=18) plot(out.proc[,,4], main="procGPA No.4", pch=18) plot(out.anc4[,,1], main="4 Anchors No.1", pch=18, col="blue") plot(out.anc4[,,2], main="4 Anchors No.2", pch=18, col="blue") plot(out.anc4[,,3], main="4 Anchors No.3", pch=18, col="blue") plot(out.anc4[,,4], main="4 Anchors No.4", pch=18, col="blue") plot(out.anc7[,,1], main="7 Anchors No.1", pch=18, col="red") plot(out.anc7[,,2], main="7 Anchors No.2", pch=18, col="red") plot(out.anc7[,,3], main="7 Anchors No.3", pch=18, col="red") plot(out.anc7[,,4], main="7 Anchors No.4", pch=18, col="red") par(opar) } } \references{ \insertRef{goodall_procrustes_1991}{maotai} } \author{ Kisung You } maotai/man/bmds.Rd0000644000175000017500000000434714176771675013702 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/bmds.R \name{bmds} \alias{bmds} \title{Bayesian Multidimensional Scaling} \usage{ bmds( data, ndim = 2, par.a = 5, par.alpha = 0.5, par.step = 1, mc.iter = 8128, verbose = TRUE ) } \arguments{ \item{data}{an \eqn{(n\times p)} matrix whose rows are observations.} \item{ndim}{an integer-valued target dimension.} \item{par.a}{hyperparameter for conjugate prior on variance term, i.e., \eqn{\sigma^2 \sim IG(a,b)}. Note that \eqn{b} is chosen appropriately as in paper.} \item{par.alpha}{hyperparameter for conjugate prior on diagonal term, i.e., \eqn{\lambda_j \sim IG(\alpha, \beta_j)}. Note that \eqn{\beta_j} is chosen appropriately as in paper.} \item{par.step}{stepsize for random-walk, which is standard deviation of Gaussian proposal.} \item{mc.iter}{the number of MCMC iterations.} \item{verbose}{a logical; \code{TRUE} to show iterations, \code{FALSE} otherwise.} } \value{ a named list containing \describe{ \item{embed}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.} \item{stress}{discrepancy between embedded and origianl data as a measure of error.} } } \description{ A Bayesian formulation of classical Multidimensional Scaling is presented. Even though this method is based on MCMC sampling, we only return maximum a posterior (MAP) estimate that maximizes the posterior distribution. Due to its nature without any special tuning, increasing \code{mc.iter} requires much computation. } \examples{ \donttest{ ## use simple example of iris dataset data(iris) idata = as.matrix(iris[,1:4]) ## run Bayesian MDS # let's run 10 iterations only. iris.cmds = cmds(idata, ndim=2) iris.bmds = bmds(idata, ndim=2, mc.iter=5, par.step=(2.38^2)) ## extract coordinates and class information cx = iris.cmds$embed # embedded coordinates of CMDS bx = iris.bmds$embed # BMDS icol = iris[,5] # class information ## visualize opar <- par(no.readonly=TRUE) par(mfrow=c(2,1)) mc = paste0("CMDS with STRESS=",round(iris.cmds$stress,4)) mb = paste0("BMDS with STRESS=",round(iris.bmds$stress,4)) plot(cx, col=icol,pch=19,main=mc) plot(bx, col=icol,pch=19,main=mb) par(opar) } } \references{ \insertRef{oh_bayesian_2001a}{maotai} } maotai/man/checkmetric.Rd0000644000175000017500000000163014176771675015226 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/checkmetric.R \name{checkmetric} \alias{checkmetric} \title{Check for Metric Matrix} \usage{ checkmetric(d) } \arguments{ \item{d}{\code{"dist"} object or \eqn{(N\times N)} matrix of pairwise distances.} } \value{ a logical; \code{TRUE} if it satisfies metric property, \code{FALSE} otherwise. } \description{ This function checks whether the distance matrix \eqn{D:=d_{ij} = d(x_i, x_j)} satisfies four axioms to make itself a semimetric, which are (1) \eqn{d_{ii} = 0}, (2) \eqn{d_{ij} > 0} for \eqn{i\neq j}, (3) \eqn{d_{ij} = d_{ji}}, and (4) \eqn{d_{ij} \leq d_{ik} + d_{kj}}. } \examples{ ## Let's use L2 distance matrix of iris dataset data(iris) dx = as.matrix(stats::dist(iris[,1:4])) # perturb d(i,j) dy = dx dy[1,2] <- dy[2,1] <- 10 # run the algorithm checkmetric(dx) checkmetric(dy) } \seealso{ \code{\link{checkdist}} } maotai/man/cov2corr.Rd0000644000175000017500000000167414176771675014514 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cov2corr.R \name{cov2corr} \alias{cov2corr} \title{Convert Covariance into Correlation Matrix} \usage{ cov2corr(mat) } \arguments{ \item{mat}{a \eqn{(p\times p)} covariance matrix.} } \value{ a \eqn{(p\times p)} correlation matrix. } \description{ Given a covariance matrix, return a correlation matrix that has unit diagonals. We strictly impose (and check) whether the given input is a symmetric matrix of full-rank. } \examples{ \donttest{ # generate an empirical covariance scaled prep_mat = stats::cov(matrix(rnorm(100*10),ncol=10)) prep_vec = diag(as.vector(stats::runif(10, max=5))) prep_cov = prep_vec\%*\%prep_mat\%*\%prep_vec # compute correlation matrix prep_cor = cov2corr(prep_cov) # visualize opar <- par(no.readonly=TRUE) par(mfrow=c(1,2), pty="s") image(prep_cov, axes=FALSE, main="covariance") image(prep_cor, axes=FALSE, main="correlation") par(opar) } } maotai/man/matderiv.Rd0000644000175000017500000000404114176771675014557 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/matderiv.R \name{matderiv} \alias{matderiv} \title{Numerical Approximation to Gradient of a Function with Matrix Argument} \usage{ matderiv(fn, x, h = 0.001) } \arguments{ \item{fn}{a function that takes a matrix of size \eqn{(n\times p)} and returns a scalar value.} \item{x}{an \eqn{(n\times p)} matrix where the gradient is to be computed.} \item{h}{step size for centered difference scheme.} } \value{ an approximate numerical gradient matrix of size \eqn{(n\times p)}. } \description{ For a given function \eqn{f:\mathbf{R}^{n\times p} \rightarrow \mathbf{R}}, we use finite difference scheme that approximates a gradient at a given point \eqn{x}. In Riemannian optimization, this can be used as a proxy for ambient gradient. Use with care since it may accumulate numerical error. } \examples{ ## function f(X) = for two vectors 'a' and 'b' # derivative w.r.t X is ab' # take an example of (5x5) symmetric positive definite matrix # problem settings a <- rnorm(5) b <- rnorm(5) ftn <- function(X){ return(sum(as.vector(X\%*\%b)*a)) } # function to be taken derivative myX <- matrix(rnorm(25),nrow=5) # point where derivative is evaluated myX <- myX\%*\%t(myX) # main computation sol.true <- base::outer(a,b) sol.num1 <- matderiv(ftn, myX, h=1e-1) # step size : 1e-1 sol.num2 <- matderiv(ftn, myX, h=1e-5) # 1e-3 sol.num3 <- matderiv(ftn, myX, h=1e-9) # 1e-5 ## visualize/print the results expar = par(no.readonly=TRUE) par(mfrow=c(2,2),pty="s") image(sol.true, main="true solution") image(sol.num1, main="h=1e-1") image(sol.num2, main="h=1e-5") image(sol.num3, main="h=1e-9") par(expar) \donttest{ ntrue = norm(sol.true,"f") cat('* Relative Errors in Frobenius Norm ') cat(paste("* h=1e-1 : ",norm(sol.true-sol.num1,"f")/ntrue,sep="")) cat(paste("* h=1e-5 : ",norm(sol.true-sol.num2,"f")/ntrue,sep="")) cat(paste("* h=1e-9 : ",norm(sol.true-sol.num3,"f")/ntrue,sep="")) } } \references{ \insertRef{kincaid_numerical_2009}{maotai} } maotai/man/trio.Rd0000644000175000017500000000517614176771675013733 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/trio.R \name{trio} \alias{trio} \title{Trace Ratio Optimation} \usage{ trio( A, B, C, dim = 2, method = c("2003Guo", "2007Wang", "2009Jia", "2012Ngo"), maxiter = 1000, eps = 1e-10 ) } \arguments{ \item{A}{a \eqn{(p\times p)} symmetric matrix in the numerator term.} \item{B}{a \eqn{(p\times p)} symmetric matrix in the denomiator term.} \item{C}{a \eqn{(p\times p)} symmetric constraint matrix. If not provided, it is set as identical matrix automatically.} \item{dim}{an integer for target dimension. It can be considered as the number of loadings.} \item{method}{the name of algorithm to be used. Default is \code{2003Guo}.} \item{maxiter}{maximum number of iterations to be performed.} \item{eps}{stopping criterion for iterative algorithms.} } \value{ a named list containing \describe{ \item{V}{a \eqn{(p\times dim)} projection matrix.} \item{tr.val}{an attained maximum scalar value.} } } \description{ This function provides several algorithms to solve the following problem \deqn{\textrm{max} \frac{tr(V^\top A V)}{tr(V^\top B V)} \textrm{ such that } V^\top C V = I} where \eqn{V} is a projection matrix, i.e., \eqn{V^\top V = I}. Trace ratio optimization is pertained to various linear dimension reduction methods. It should be noted that when \eqn{C = I}, the above problem is often reformulated as a generalized eigenvalue problem since it's an easier proxy with faster computation. } \examples{ ## simple test # problem setting p = 5 mydim = 2 A = matrix(rnorm(p^2),nrow=p); A=A\%*\%t(A) B = matrix(runif(p^2),nrow=p); B=B\%*\%t(B) C = diag(p) # approximate solution via determinant ratio problem formulation eigAB = eigen(solve(B,A)) V = eigAB$vectors[,1:mydim] eigval = sum(diag(t(V)\%*\%A\%*\%V))/sum(diag(t(V)\%*\%B\%*\%V)) # solve using 4 algorithms m12 = trio(A,B,dim=mydim, method="2012Ngo") m09 = trio(A,B,dim=mydim, method="2009Jia") m07 = trio(A,B,dim=mydim, method="2007Wang") m03 = trio(A,B,dim=mydim, method="2003Guo") # print the results line1 = '* Evaluation of the cost function' line2 = paste("* approx. via determinant : ",eigval,sep="") line3 = paste("* trio by 2012Ngo : ",m12$tr.val, sep="") line4 = paste("* trio by 2009Jia : ",m09$tr.val, sep="") line5 = paste("* trio by 2007Wang : ",m07$tr.val, sep="") line6 = paste("* trio by 2003Guo : ",m03$tr.val, sep="") cat(line1,"\n",line2,"\n",line3,"\n",line4,"\n",line5,"\n",line6) } \references{ \insertRef{guo_generalized_2003}{maotai} \insertRef{wang_trace_2007}{maotai} \insertRef{yangqingjia_trace_2009}{maotai} \insertRef{ngo_trace_2012}{maotai} } maotai/man/sylvester.Rd0000644000175000017500000000236714176771675015015 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sylvester.R \name{sylvester} \alias{sylvester} \title{Solve Sylvester Equation} \usage{ sylvester(A, B, C) } \arguments{ \item{A}{a \eqn{(p\times p)} matrix as above.} \item{B}{a \eqn{(p\times p)} matrix as above.} \item{C}{a \eqn{(p\times p)} matrix as above.} } \value{ a solution matrix \eqn{X} of size \eqn{(p\times p)}. } \description{ The Sylvester equation is of form \deqn{AX + XB = C} where \eqn{X} is the unknown and others are given. Though it's possible to have non-square \eqn{A} and \eqn{B} matrices, we currently support square matrices only. This is a wrapper of \code{armadillo}'s \code{sylvester} function. } \examples{ ## simulated example # generate square matrices A = matrix(rnorm(25),nrow=5) X = matrix(rnorm(25),nrow=5) B = matrix(rnorm(25),nrow=5) C = A\%*\%X + X\%*\%B # solve using 'sylvester' function solX = sylvester(A,B,C) pm1 = "* Experiment with Sylvester Solver" pm2 = paste("* Absolute Error : ",norm(solX-X,"f"),sep="") pm3 = paste("* Relative Error : ",norm(solX-X,"f")/norm(X,"f"),sep="") cat(paste(pm1,"\n",pm2,"\n",pm3,sep="")) } \references{ \insertRef{sanderson_armadillo_2016}{maotai} \insertRef{eddelbuettel_rcpparmadillo_2014}{maotai} } maotai/man/weiszfeld.Rd0000644000175000017500000000251514176771675014744 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/weiszfeld.R \name{weiszfeld} \alias{weiszfeld} \title{Weiszfeld Algorithm for Computing L1-median} \usage{ weiszfeld(X, weights = NULL, maxiter = 496, abstol = 1e-06) } \arguments{ \item{X}{an \eqn{(n\times p)} matrix for \eqn{p}-dimensional signal. If vector is given, it is assumed that \eqn{p=1}.} \item{weights}{\code{NULL} for equal weight \code{rep(1/n,n)}; otherwise, it has to be a vector of length \eqn{n}.} \item{maxiter}{maximum number of iterations.} \item{abstol}{stopping criterion} } \description{ Geometric median, also known as L1-median, is a solution to the following problem \deqn{\textrm{argmin} \sum_{i=1}^n \| x_i - y \|_2 } for a given data \eqn{x_1,x_2,\ldots,x_n \in R^p}. } \examples{ ## generate sin(x) data with noise for 100 replicates set.seed(496) t = seq(from=0,to=10,length.out=20) X = array(0,c(100,20)) for (i in 1:100){ X[i,] = sin(t) + stats::rnorm(20, sd=0.5) } ## compute L1-median and L2-mean vecL2 = base::colMeans(X) vecL1 = weiszfeld(X) ## visualize opar <- par(no.readonly=TRUE) par(mfrow=c(1,3), pty="s") matplot(t(X[1:5,]), type="l", main="5 generated data", ylim=c(-2,2)) plot(t, vecL2, type="l", col="blue", main="L2-mean", ylim=c(-2,2)) plot(t, vecL1, type="l", col="red", main="L1-median", ylim=c(-2,2)) par(opar) } maotai/man/cmds.Rd0000644000175000017500000000206414176771675013675 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cmds.R \name{cmds} \alias{cmds} \title{Classical Multidimensional Scaling} \usage{ cmds(data, ndim = 2) } \arguments{ \item{data}{an \eqn{(n\times p)} matrix whose rows are observations.} \item{ndim}{an integer-valued target dimension.} } \value{ a named list containing \describe{ \item{embed}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.} \item{stress}{discrepancy between embedded and origianl data as a measure of error.} } } \description{ Classical multidimensional scaling aims at finding low-dimensional structure by preserving pairwise distances of data. } \examples{ ## use simple example of iris dataset data(iris) idata = as.matrix(iris[,1:4]) icol = as.factor(iris[,5]) # class information ## run Classical MDS iris.cmds = cmds(idata, ndim=2) ## visualize opar <- par(no.readonly=TRUE) plot(iris.cmds$embed, col=icol, main=paste0("STRESS=",round(iris.cmds$stress,4))) par(opar) } \references{ \insertRef{torgerson_multidimensional_1952}{maotai} } maotai/man/nef.Rd0000644000175000017500000000145314176771675013520 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nef.R \name{nef} \alias{nef} \title{Negative Eigenfraction} \usage{ nef(data) } \arguments{ \item{data}{an \eqn{(n\times p)} matrix whose rows are observations.} } \value{ a nonnegative NEF value. } \description{ Negative Eigenfraction (NEF) is a measure of distortion for the data whether they are lying in Euclidean manner or not. When the value is exactly 0, it means the data is Euclidean. On the other hand, when NEF is far away from 0, it means not Euclidean. The concept of NEF is closely related to the definiteness of a Gram matrix. } \examples{ ## use simple example of iris dataset data(iris) mydat = as.matrix(iris[,1:4]) ## calculate NEF nef(mydat) } \references{ \insertRef{pekalska_noneuclidean_2006}{maotai} } maotai/man/cov2pcorr.Rd0000644000175000017500000000211114176771675014657 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cov2pcorr.R \name{cov2pcorr} \alias{cov2pcorr} \title{Convert Covariance into Partial Correlation Matrix} \usage{ cov2pcorr(mat) } \arguments{ \item{mat}{a \eqn{(p\times p)} covariance matrix.} } \value{ a \eqn{(p\times p)} partial correlation matrix. } \description{ Given a covariance matrix, return a partial correlation matrix that has unit diagonals. We strictly impose (and check) whether the given input is a symmetric matrix of full-rank. } \examples{ \donttest{ # generate an empirical covariance scaled prep_mat = stats::cov(matrix(rnorm(100*10),ncol=10)) prep_vec = diag(as.vector(stats::runif(10, max=5))) prep_cov = prep_vec\%*\%prep_mat\%*\%prep_vec # compute correlation and partial correlation matrices prep_cor = cov2corr(prep_cov) prep_par = cov2pcorr(prep_cov) # visualize opar <- par(no.readonly=TRUE) par(mfrow=c(1,3), pty="s") image(prep_cov, axes=FALSE, main="covariance") image(prep_cor, axes=FALSE, main="correlation") image(prep_par, axes=FALSE, main="partial correlation") par(opar) } } maotai/man/pdeterminant.Rd0000644000175000017500000000250514176771675015441 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/pdeterminant.R \name{pdeterminant} \alias{pdeterminant} \title{Calculate the Pseudo-Determinant of a Matrix} \usage{ pdeterminant(A) } \arguments{ \item{A}{a square matrix whose pseudo-determinant be computed.} } \value{ a scalar value for computed pseudo-determinant. } \description{ When a given square matrix \eqn{A} is rank deficient, determinant is zero. Still, we can compute the pseudo-determinant by multiplying all non-zero eigenvalues. Since thresholding to determine near-zero eigenvalues is subjective, we implemented the function as of original limit problem. When matrix is non-singular, it coincides with traditional determinant. } \examples{ ## show the convergence of pseudo-determinant # settings n = 10 A = cov(matrix(rnorm(5*n),ncol=n)) # (n x n) matrix k = as.double(Matrix::rankMatrix(A)) # rank of A # iterative computation ntry = 11 del.vec = exp(-(1:ntry)) det.vec = rep(0,ntry) for (i in 1:ntry){ del = del.vec[i] det.vec[i] = det(A+del*diag(n))/(del^(n-k)) } # visualize the results opar <- par(no.readonly=TRUE) plot(1:ntry, det.vec, main=paste("true rank is ",k," out of ",n,sep=""),"b", xlab="iterations") abline(h=pdeterminant(A),col="red",lwd=1.2) par(opar) } \references{ \insertRef{holbrook_differentiating_2018}{maotai} } maotai/man/mmd2test.Rd0000644000175000017500000000526214176771675014511 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/mmd2test.R \name{mmd2test} \alias{mmd2test} \title{Kernel Two-sample Test with Maximum Mean Discrepancy} \usage{ mmd2test(K, label, method = c("b", "u"), mc.iter = 999) } \arguments{ \item{K}{kernel matrix or an object of \code{kernelMatrix} class from \pkg{kernlab} package.} \item{label}{label vector of class indices.} \item{method}{type of estimator to be used. \code{"b"} for biased and \code{"u"} for unbiased estimator of MMD.} \item{mc.iter}{the number of Monte Carlo resampling iterations.} } \value{ a (list) object of \code{S3} class \code{htest} containing: \describe{ \item{statistic}{a test statistic.} \item{p.value}{\eqn{p}-value under \eqn{H_0}.} \item{alternative}{alternative hypothesis.} \item{method}{name of the test.} \item{data.name}{name(s) of provided kernel matrix.} } } \description{ Maximum Mean Discrepancy (MMD) as a measure of discrepancy between samples is employed as a test statistic for two-sample hypothesis test of equal distributions. Kernel matrix \eqn{K} is a symmetric square matrix that is positive semidefinite. } \examples{ ## small test for CRAN submission dat1 <- matrix(rnorm(60, mean= 1), ncol=2) # group 1 : 30 obs of mean 1 dat2 <- matrix(rnorm(50, mean=-1), ncol=2) # group 2 : 25 obs of mean -1 dmat <- as.matrix(dist(rbind(dat1, dat2))) # Euclidean distance matrix kmat <- exp(-(dmat^2)) # build a gaussian kernel matrix lab <- c(rep(1,30), rep(2,25)) # corresponding label mmd2test(kmat, lab) # run the code ! \dontrun{ ## WARNING: computationally heavy. # Let's compute empirical Type 1 error at alpha=0.05 niter = 496 pvals1 = rep(0,niter) pvals2 = rep(0,niter) for (i in 1:niter){ dat = matrix(rnorm(200),ncol=2) lab = c(rep(1,50), rep(2,50)) lbd = 0.1 kmat = exp(-lbd*(as.matrix(dist(dat))^2)) pvals1[i] = mmd2test(kmat, lab, method="b")$p.value pvals2[i] = mmd2test(kmat, lab, method="u")$p.value print(paste("iteration ",i," complete..",sep="")) } # Visualize the above at multiple significance levels alphas = seq(from=0.001, to=0.999, length.out=100) errors1 = rep(0,100) errors2 = rep(0,100) for (i in 1:100){ errors1[i] = sum(pvals1<=alphas[i])/niter errors2[i] = sum(pvals2<=alphas[i])/niter } opar <- par(no.readonly=TRUE) par(mfrow=c(1,2), pty="s") plot(alphas, errors1, "b", main="Biased Estimator Error", xlab="alpha", ylab="error", cex=0.5) abline(a=0,b=1, lwd=1.5, col="red") plot(alphas, errors2, "b", main="Unbiased Estimator Error", xlab="alpha", ylab="error", cex=0.5) abline(a=0,b=1, lwd=1.5, col="blue") par(opar) } } \references{ \insertRef{gretton_kernel_2012}{maotai} } maotai/man/tsne.Rd0000644000175000017500000000275114176771675013723 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tsne.R \name{tsne} \alias{tsne} \title{t-SNE Embedding} \usage{ tsne(data, ndim = 2, ...) } \arguments{ \item{data}{an \eqn{(n\times p)} matrix whose rows are observations.} \item{ndim}{an integer-valued target dimension.} \item{...}{extra parameters to be used in \code{\link[Rtsne]{Rtsne}} function.} } \value{ a named list containing \describe{ \item{embed}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.} \item{stress}{discrepancy between embedded and origianl data as a measure of error.} } } \description{ This function is a simple wrapper of \code{\link[Rtsne]{Rtsne}} function for t-Stochastic Neighbor Embedding for finding low-dimensional structure of the data embedded in the high-dimensional space. } \examples{ \donttest{ ## use simple example of iris dataset data(iris) mydat = as.matrix(iris[,1:4]) mylab = as.factor(iris[,5]) ## run t-SNE and MDS for comparison iris.cmds = cmds(mydat, ndim=2) iris.tsne = tsne(mydat, ndim=2) ## extract coordinates and class information cx = iris.cmds$embed # embedded coordinates of CMDS tx = iris.tsne$embed # t-SNE ## visualize # main title mc = paste("CMDS with STRESS=",round(iris.cmds$stress,4),sep="") mt = paste("tSNE with STRESS=",round(iris.tsne$stress,4),sep="") # draw a figure opar <- par(no.readonly=TRUE) par(mfrow=c(1,2)) plot(cx, col=mylab, pch=19, main=mc) plot(tx, col=mylab, pch=19, main=mt) par(opar) } } maotai/man/kmeanspp.Rd0000644000175000017500000000270414176771675014566 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/kmeanspp.R \name{kmeanspp} \alias{kmeanspp} \title{K-Means++ Clustering Algorithm} \usage{ kmeanspp(data, k = 2) } \arguments{ \item{data}{an \eqn{(n\times p)} matrix whose rows are observations.} \item{k}{the number of clusters.} } \value{ a length-\eqn{n} vector of class labels. } \description{ \eqn{k}-means++ algorithm is known to be a smart, careful initialization technique. It is originally intended to return a set of \eqn{k} points as initial centers though it can still be used as a rough clustering algorithm by assigning points to the nearest points. } \examples{ ## use simple example of iris dataset data(iris) mydata = as.matrix(iris[,1:4]) mycol = as.factor(iris[,5]) ## find the low-dimensional embedding for visualization my2d = cmds(mydata, ndim=2)$embed ## apply 'kmeanspp' with different numbers of k's. k2 = kmeanspp(mydata, k=2) k3 = kmeanspp(mydata, k=3) k4 = kmeanspp(mydata, k=4) k5 = kmeanspp(mydata, k=5) k6 = kmeanspp(mydata, k=6) ## visualize opar <- par(no.readonly=TRUE) par(mfrow=c(2,3)) plot(my2d, col=k2, main="k=2", pch=19, cex=0.5) plot(my2d, col=k3, main="k=3", pch=19, cex=0.5) plot(my2d, col=k4, main="k=4", pch=19, cex=0.5) plot(my2d, col=k5, main="k=5", pch=19, cex=0.5) plot(my2d, col=k6, main="k=6", pch=19, cex=0.5) plot(my2d, col=mycol, main="true cluster", pch=19, cex=0.5) par(opar) } \references{ \insertRef{arthur_kmeans_2007}{maotai} } maotai/man/checkdist.Rd0000644000175000017500000000155214176771675014711 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/checkdist.R \name{checkdist} \alias{checkdist} \title{Check for Distance Matrix} \usage{ checkdist(d) } \arguments{ \item{d}{\code{"dist"} object or \eqn{(N\times N)} matrix of pairwise distances.} } \value{ a logical; \code{TRUE} if it satisfies metric property, \code{FALSE} otherwise. } \description{ This function checks whether the distance matrix \eqn{D:=d_{ij} = d(x_i, x_j)} satisfies three axioms to make itself a semimetric, which are (1) \eqn{d_{ii} = 0}, (2) \eqn{d_{ij} > 0} for \eqn{i\neq j}, and (3) \eqn{d_{ij} = d_{ji}}. } \examples{ ## Let's use L2 distance matrix of iris dataset data(iris) dx = as.matrix(stats::dist(iris[,1:4])) # perturb d(i,j) dy = dx dy[1,2] <- dy[2,1] <- 10 # run the algorithm checkdist(dx) checkdist(dy) } \seealso{ \code{\link{checkmetric}} } maotai/man/nem.Rd0000644000175000017500000000147114176771675013527 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/nem.R \name{nem} \alias{nem} \title{Negative Eigenvalue Magnitude} \usage{ nem(data) } \arguments{ \item{data}{an \eqn{(n\times p)} matrix whose rows are observations.} } \value{ a nonnegative NEM value. } \description{ Negative Eigenvalue Magnitude (NEM) is a measure of distortion for the data whether they are lying in Euclidean manner or not. When the value is exactly 0, it means the data is Euclidean. On the other hand, when NEM is far away from 0, it means not Euclidean. The concept of NEM is closely related to the definiteness of a Gram matrix. } \examples{ ## use simple example of iris dataset data(iris) mydat = as.matrix(iris[,1:4]) ## calculate NEM nem(mydat) } \references{ \insertRef{pekalska_noneuclidean_2006}{maotai} } maotai/man/lyapunov.Rd0000644000175000017500000000217314176771675014625 0ustar nileshnilesh% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lyapunov.R \name{lyapunov} \alias{lyapunov} \title{Solve Lyapunov Equation} \usage{ lyapunov(A, Q) } \arguments{ \item{A}{a \eqn{(p\times p)} matrix as above.} \item{Q}{a \eqn{(p\times p)} matrix as above.} } \value{ a solution matrix \eqn{X} of size \eqn{(p\times p)}. } \description{ The Lyapunov equation is of form \deqn{AX + XA^\top = Q} where \eqn{A} and \eqn{Q} are square matrices of same size. Above form is also known as \emph{continuous} form. This is a wrapper of \code{armadillo}'s \code{sylvester} function. } \examples{ ## simulated example # generate square matrices A = matrix(rnorm(25),nrow=5) X = matrix(rnorm(25),nrow=5) Q = A\%*\%X + X\%*\%t(A) # solve using 'lyapunov' function solX = lyapunov(A,Q) \dontrun{ pm1 = "* Experiment with Lyapunov Solver" pm2 = paste("* Absolute Error : ",norm(solX-X,"f"),sep="") pm3 = paste("* Relative Error : ",norm(solX-X,"f")/norm(X,"f"),sep="") cat(paste(pm1,"\n",pm2,"\n",pm3,sep="")) } } \references{ \insertRef{sanderson_armadillo_2016}{maotai} \insertRef{eddelbuettel_rcpparmadillo_2014}{maotai} } maotai/src/0000755000175000017500000000000014176774175012470 5ustar nileshnileshmaotai/src/cpp_bmds.cpp0000644000175000017500000001672714176771675015002 0ustar nileshnilesh#include // [[Rcpp::depends(RcppArmadillo, RcppDist)]] using namespace Rcpp; using namespace arma; double my_invgamma(double alpha, double beta){ return(1.0/R::rgamma(alpha,1.0/beta)); } // 2. my_dinvgamma : inverse gamma evaluator double my_dinvgamma(double x, double alpha, double beta){ return(1.0/R::dgamma(x, alpha, 1.0/beta, 0)); } // Auxiliary Function : compute SSR // [[Rcpp::export]] double compute_SSR(arma::mat &D, arma::mat &Delta){ // parameters int N = D.n_rows; double NN = static_cast(N); // compute via iteration double outval = 0.0; double tobesq = 0.0; for (int i=0;i<(N-1);i++){ for (int j=(i+1);j(N); int p = Xnew.n_cols; double outval = 0.0; double tobesq = 0.0; arma::rowvec xvec1(p,fill::zeros); arma::rowvec xvec2(p,fill::zeros); double Delij = 0.0; for (int i=0;i(N)); arma::vec eigval; arma::mat eigvec; arma::eig_sym(eigval, eigvec, Xcov); arma::mat output = Xtmp*eigvec; return(output); } // Auxiliary Function : a single step for MH update arma::rowvec update_xvec(arma::mat D, arma::mat X, int id, double sigma2, double constant, arma::mat Lbdmat){ int N = X.n_rows; double NN = static_cast(N); int p = X.n_cols; arma::mat Xold = X; arma::mat Xtgt = X; double stepsize = static_cast(std::sqrt(static_cast(sigma2*constant/(NN-1.0)))); for (int i=0;i(std::sqrt(static_cast(R::pnorm5(Deltgt(i,j)/sigma,0.0,1.0,1,0)))); } } } double ftgt = -(Q1tgt+Q2tgt)/2.0 - t3tgt; // (2) compute for xold arma::mat Delold = compute_pdmat(Xold); double Q1old = 0.0; for (int i=0;i<(N-1);i++){ for (int j=(i+1);j(std::sqrt(static_cast(R::pnorm5(Delold(i,j)/sigma,0.0,1.0,1,0)))); } } } double fold = -(Q1old+Q2old)/2.0 - t3old; // (3) compute the ratio (?) double fratio = exp(ftgt-fold); if (fratio >= 1){ fratio = 1.0; } double rnumbr = R::runif(0.0, 1.0); if (rnumbr <= fratio){ // accept return(xtgt.t()); } else { return(xold.t()); } } // Auxiliary Function : 'stress' // https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Multidimensional_Scaling.pdf // [[Rcpp::export]] double compute_stress(arma::mat &D, arma::mat &Dhat){ // D is original distance, Dhat is estimated ones int N = D.n_rows; double tobesq = 0.0; double term1 = 0.0; // numerator double term2 = 0.0; // denominator for (int i=0;i<(N-1);i++){ for (int j=(i+1);j(N); int p = X0.n_cols; double m = NN*(NN-1.0)/2.0; // 2) setup arma::mat Xold = crotX(X0); // X will not be recorded, just use arma::mat Xnew(N,p,fill::zeros); arma::mat Xsol = Xold; double SSRnew = 0.0; double SSRold = compute_SSR_xmat(D, Xold); double SSRsol = SSRold; arma::mat Sold(p,p,fill::zeros); double sigma2 = sigg0; double sigtmp = 0.0; arma::vec vecs(p,fill::zeros); arma::vec lambdas(p,fill::zeros); arma::mat Lbdmat; arma::rowvec tmprow(p,fill::zeros); double b = (a-1)*SSRold/m; // paper's setup double varalpha = 0.0; double varbeta = 0.0; double varvar = 0.0; double varratio = 0.0; // 3) iteration // int accept = 0; for (int i=0;i 0){ // let's compare varratio = my_dinvgamma(sigtmp,varalpha,varbeta)/my_dinvgamma(sigma2,varalpha,varbeta); if (varratio > 1){ varratio = 1.0; } if (R::runif(0,1) <= varratio){ sigma2 = sigtmp; } } // 3-4. update correspondingly if (SSRnew < SSRsol){ // running record of the best solution SSRsol = SSRnew; Xsol = Xnew; } SSRold = SSRnew; Xold = Xnew; // 3-5. report the update if (verbose==true){ Rcpp::Rcout << "** bmds : iteration " << i+1 << "/" << maxiter << " complete." << std::endl; } } // 4) return return Rcpp::List::create(Rcpp::Named("solX")=Xsol); } maotai/src/cpp_mmds.cpp0000644000175000017500000000440214176771675015000 0ustar nileshnilesh#include // [[Rcpp::depends(RcppArmadillo)]] using namespace Rcpp; using namespace arma; using namespace std; // [[Rcpp::export]] arma::mat cpp_mmds(arma::mat& D, int ndim, int maxiter, double abstol){ // initialization with CMDS int N = D.n_rows; arma::mat D2 = arma::pow(D, 2.0); arma::mat J = arma::eye(N,N) - (arma::ones(N,N)/(static_cast(N))); arma::mat B = -0.5*J*D2*J; arma::vec eigval; arma::mat eigvec; arma::eig_sym(eigval, eigvec, B); arma::mat old_X = eigvec.tail_cols(ndim)*arma::diagmat(arma::sqrt(eigval.tail(ndim))); arma::mat old_D(N,N,fill::zeros); for (int i=0; i<(N-1); i++){ for (int j=(i+1); j epsthr){ BZ(i,j) = -D(i,j)/dijZ; BZ(j,i) = BZ(i,j); } } } // diagonal part for (int i=0; i(N)); new_D.fill(0.0); for (int i=0; i<(N-1); i++){ for (int j=(i+1); j using namespace Rcpp; using namespace arma; double eval_gaussian(arma::vec x, arma::vec mu, arma::mat cov); double eval_gmm(arma::vec x, arma::mat mus, arma::cube covs, arma::vec weight); arma::vec eval_gaussian_data(arma::mat X, arma::vec mu, arma::mat cov); arma::vec eval_gmm_data(arma::mat X, arma::mat mus, arma::cube covs, arma::vec weight); #endif maotai/src/cpp_smacof.cpp0000644000175000017500000000741314176771675015315 0ustar nileshnilesh#include // [[Rcpp::depends(RcppArmadillo)]] using namespace Rcpp; using namespace arma; using namespace std; /* Auxiliary Functions * init_by_cmds : quick lazy implementation of CMDS * construct_Vinv_weighted : Vinv * compute_raw_stress : compute the raw stress * operation_B : B(Z) operator */ arma::mat operation_B(arma::mat &DZ, arma::mat &D, arma::mat &W){ int N = DZ.n_rows; arma::mat BZ(N,N,fill::zeros); // off-diagonals first for (int i=0; i<(N-1); i++){ for (int j=(i+1); j(N,N) - (arma::ones(N,N)/(static_cast(N))); arma::mat B = -0.5*J*D2*J; arma::vec eigval; arma::mat eigvec; arma::eig_sym(eigval, eigvec, B); arma::mat hey_mat = arma::fliplr(eigvec.tail_cols(ndim)); arma::vec hey_vec = arma::sqrt(arma::reverse(eigval.tail(ndim))); arma::mat output = hey_mat*arma::diagmat(hey_vec); return(output); } arma::mat construct_Vinv_weighted(arma::mat &W){ int N = W.n_rows; // compute V first arma::rowvec rowW(N,fill::zeros); arma::mat V(N,N,fill::zeros); for (int i=0; i<(N-1); i++){ for (int j=(i+1); j(N,N)) - ((arma::eye(N,N))/static_cast(N*N)); return(Vinv); } // [[Rcpp::export]] Rcpp::List src_smacof(arma::mat &D, arma::mat &W, int ndim, int maxiter, double abstol, bool use_gutman){ // initialize via CMDS int N = D.n_rows; arma::mat old_X = init_by_cmds(D, ndim); arma::mat new_X(N, ndim, fill::zeros); arma::mat old_Xdist(N,N,fill::zeros); arma::mat new_Xdist(N,N,fill::zeros); for (int i=0; i<(N-1); i++){ for (int j=(i+1); j(N,N)/(static_cast(N)); } else { Vinv = construct_Vinv_weighted(W); } // iterate for (int it=0; it(N)); } // compute the pairwise distance for (int i=0; i<(N-1); i++){ for (int j=(i+1); j #include "evaluations.h" // [[Rcpp::depends(RcppArmadillo)]] using namespace Rcpp; using namespace arma; /* * 1. aux_shortestpath : 'shortestpath' * 2. cppsub_2007Wang : 'trio' * 3. gradF : 'matderiv' * 4. dat2centers : 'dpmeans' * 5. cpp_sylvester & * cpp_lyapunov * 6. cpp_weiszfeld : 'weiszfeld' * 7. cpp_kmeans : 'kmeans' for testing only * 8. emds_gamma0 : 'emds' * 9. cpp_pairwise_L2 : L2 distance between GMM's. * 10. integrate_1d : 1d integration, generic but used in distance computation * 11. cpp_pdist : compute pairwise distance * 12. cpp_geigen : do 'geigen' pairwise eigendecomposition * 13. cpp_triangle : check triangle inequality */ /////////////////////////////////////////////////////////////////// // 1. aux_shortestpath //////////////////// Sub Routine LogicalMatrix isweird(NumericMatrix x){ const int n = x.nrow(); LogicalMatrix out(n,n); for (int i=0;i(dist(i,k)+dist(k,j))){ dist(i,j)=dist(i,k)+dist(k,j); } } } } // 3-4. return output return(dist); } /////////////////////////////////////////////////////////////////// // 2. cppsub_2007Wang // [[Rcpp::export]] arma::mat cppsub_2007Wang(arma::mat V0, int mm, int d, arma::mat Spu, arma::mat Stu, int maxiter, double eps){ // 1. preliminary setup double abstol = std::sqrt((static_cast(mm*d))*eps); arma::mat Vold = V0; arma::mat Vnew(mm,d,fill::zeros); double lbdn = 0.0; double incV = 0.0; arma::vec Vval(mm,fill::zeros); arma::mat Vvec(mm,mm,fill::zeros); arma::mat Vtmp(mm,d,fill::zeros); arma::mat Stv(mm,mm,fill::zeros); // 2. do the iteration for (int i=0;i(func(Xp)))- sum(as(func(Xm))))/(2.0*h); Xp(i,j) = Xp(i,j) - h; Xm(i,j) = Xm(i,j) + h; } } return(dX); } /////////////////////////////////////////////////////////////////// // 4. dat2centers // [[Rcpp::export]] arma::vec dat2centers(arma::rowvec data, arma::mat ¢ers){ // parameters int K = centers.n_rows; int p = data.n_cols; // compute arma::vec dic(K,fill::zeros); arma::rowvec diffvec(p,fill::zeros); for (int k=0;k gamma0){ gamma0 = theval; } } } } // report return(gamma0); } /////////////////////////////////////////////////////////////////// // 9. cpp_pairwise_L2 // [[Rcpp::export]] Rcpp::List cpp_pairwise_L2(arma::mat muA, arma::mat muB, arma::cube covA, arma::cube covB){ // parameters int N = muA.n_rows; int M = muB.n_rows; int p = muA.n_cols; // output arma::mat matA(N,N,fill::zeros); arma::mat matB(M,M,fill::zeros); arma::mat matC(N,M,fill::zeros); // preparation arma::vec parvec1(p,fill::zeros); arma::vec parvec2(p,fill::zeros); arma::mat parcovv(p,p,fill::zeros); // matA : N normals from mixture 1 for (int n=0;n(x.n_elem); arma::vec xdiff = x-mu; arma::vec xsolv = arma::solve(cov, xdiff); double term1 = arma::dot(xdiff,xsolv)*(-0.5); double term2 = -(d/2.0)*std::log(pi2); double term3 = -0.5*std::log(static_cast(arma::det(cov))); return(std::exp(term1+term2+term3)); } // [[Rcpp::export]] arma::vec eval_gaussian_data(arma::mat X, arma::vec mu, arma::mat cov){ // (n x p) convention // some constants double pi2 = 6.28318530717958623199592693708837032318115234375; int n = X.n_rows; int d = mu.n_elem; // output arma::vec output(n,fill::zeros); // common objects double term1 = 0.0; double term2 = -(static_cast(d)/2.0)*std::log(pi2); double term3 = -0.5*std::log(static_cast(arma::det(cov))); arma::rowvec xtgtrow(d,fill::zeros); arma::colvec xtgtcol(d,fill::zeros); arma::vec xdiff(d,fill::zeros); arma::vec xsolv(d,fill::zeros); for (int i=0;i do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include #include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // compute_SSR double compute_SSR(arma::mat& D, arma::mat& Delta); RcppExport SEXP _maotai_compute_SSR(SEXP DSEXP, SEXP DeltaSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat& >::type D(DSEXP); Rcpp::traits::input_parameter< arma::mat& >::type Delta(DeltaSEXP); rcpp_result_gen = Rcpp::wrap(compute_SSR(D, Delta)); return rcpp_result_gen; END_RCPP } // compute_stress double compute_stress(arma::mat& D, arma::mat& Dhat); RcppExport SEXP _maotai_compute_stress(SEXP DSEXP, SEXP DhatSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat& >::type D(DSEXP); Rcpp::traits::input_parameter< arma::mat& >::type Dhat(DhatSEXP); rcpp_result_gen = Rcpp::wrap(compute_stress(D, Dhat)); return rcpp_result_gen; END_RCPP } // main_bmds Rcpp::List main_bmds(arma::mat D, arma::mat X0, double sigg0, double a, double alpha, int maxiter, double constant, bool verbose, arma::vec betas); RcppExport SEXP _maotai_main_bmds(SEXP DSEXP, SEXP X0SEXP, SEXP sigg0SEXP, SEXP aSEXP, SEXP alphaSEXP, SEXP maxiterSEXP, SEXP constantSEXP, SEXP verboseSEXP, SEXP betasSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type D(DSEXP); Rcpp::traits::input_parameter< arma::mat >::type X0(X0SEXP); Rcpp::traits::input_parameter< double >::type sigg0(sigg0SEXP); Rcpp::traits::input_parameter< double >::type a(aSEXP); Rcpp::traits::input_parameter< double >::type alpha(alphaSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); Rcpp::traits::input_parameter< double >::type constant(constantSEXP); Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP); Rcpp::traits::input_parameter< arma::vec >::type betas(betasSEXP); rcpp_result_gen = Rcpp::wrap(main_bmds(D, X0, sigg0, a, alpha, maxiter, constant, verbose, betas)); return rcpp_result_gen; END_RCPP } // aux_shortestpath Rcpp::NumericMatrix aux_shortestpath(NumericMatrix& wmat); RcppExport SEXP _maotai_aux_shortestpath(SEXP wmatSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< NumericMatrix& >::type wmat(wmatSEXP); rcpp_result_gen = Rcpp::wrap(aux_shortestpath(wmat)); return rcpp_result_gen; END_RCPP } // cppsub_2007Wang arma::mat cppsub_2007Wang(arma::mat V0, int mm, int d, arma::mat Spu, arma::mat Stu, int maxiter, double eps); RcppExport SEXP _maotai_cppsub_2007Wang(SEXP V0SEXP, SEXP mmSEXP, SEXP dSEXP, SEXP SpuSEXP, SEXP StuSEXP, SEXP maxiterSEXP, SEXP epsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type V0(V0SEXP); Rcpp::traits::input_parameter< int >::type mm(mmSEXP); Rcpp::traits::input_parameter< int >::type d(dSEXP); Rcpp::traits::input_parameter< arma::mat >::type Spu(SpuSEXP); Rcpp::traits::input_parameter< arma::mat >::type Stu(StuSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); Rcpp::traits::input_parameter< double >::type eps(epsSEXP); rcpp_result_gen = Rcpp::wrap(cppsub_2007Wang(V0, mm, d, Spu, Stu, maxiter, eps)); return rcpp_result_gen; END_RCPP } // gradF arma::mat gradF(Function func, arma::mat xnow, double h); RcppExport SEXP _maotai_gradF(SEXP funcSEXP, SEXP xnowSEXP, SEXP hSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< Function >::type func(funcSEXP); Rcpp::traits::input_parameter< arma::mat >::type xnow(xnowSEXP); Rcpp::traits::input_parameter< double >::type h(hSEXP); rcpp_result_gen = Rcpp::wrap(gradF(func, xnow, h)); return rcpp_result_gen; END_RCPP } // dat2centers arma::vec dat2centers(arma::rowvec data, arma::mat& centers); RcppExport SEXP _maotai_dat2centers(SEXP dataSEXP, SEXP centersSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::rowvec >::type data(dataSEXP); Rcpp::traits::input_parameter< arma::mat& >::type centers(centersSEXP); rcpp_result_gen = Rcpp::wrap(dat2centers(data, centers)); return rcpp_result_gen; END_RCPP } // cpp_sylvester arma::mat cpp_sylvester(arma::mat A, arma::mat B, arma::mat C); RcppExport SEXP _maotai_cpp_sylvester(SEXP ASEXP, SEXP BSEXP, SEXP CSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type A(ASEXP); Rcpp::traits::input_parameter< arma::mat >::type B(BSEXP); Rcpp::traits::input_parameter< arma::mat >::type C(CSEXP); rcpp_result_gen = Rcpp::wrap(cpp_sylvester(A, B, C)); return rcpp_result_gen; END_RCPP } // solve_lyapunov arma::mat solve_lyapunov(arma::mat A, arma::mat B, arma::mat C); RcppExport SEXP _maotai_solve_lyapunov(SEXP ASEXP, SEXP BSEXP, SEXP CSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type A(ASEXP); Rcpp::traits::input_parameter< arma::mat >::type B(BSEXP); Rcpp::traits::input_parameter< arma::mat >::type C(CSEXP); rcpp_result_gen = Rcpp::wrap(solve_lyapunov(A, B, C)); return rcpp_result_gen; END_RCPP } // cpp_weiszfeld arma::rowvec cpp_weiszfeld(arma::mat X, double abstol, int maxiter, arma::rowvec xinit, arma::vec weights, double epsnum); RcppExport SEXP _maotai_cpp_weiszfeld(SEXP XSEXP, SEXP abstolSEXP, SEXP maxiterSEXP, SEXP xinitSEXP, SEXP weightsSEXP, SEXP epsnumSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type X(XSEXP); Rcpp::traits::input_parameter< double >::type abstol(abstolSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); Rcpp::traits::input_parameter< arma::rowvec >::type xinit(xinitSEXP); Rcpp::traits::input_parameter< arma::vec >::type weights(weightsSEXP); Rcpp::traits::input_parameter< double >::type epsnum(epsnumSEXP); rcpp_result_gen = Rcpp::wrap(cpp_weiszfeld(X, abstol, maxiter, xinit, weights, epsnum)); return rcpp_result_gen; END_RCPP } // cpp_kmeans Rcpp::List cpp_kmeans(arma::mat data, int k); RcppExport SEXP _maotai_cpp_kmeans(SEXP dataSEXP, SEXP kSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type data(dataSEXP); Rcpp::traits::input_parameter< int >::type k(kSEXP); rcpp_result_gen = Rcpp::wrap(cpp_kmeans(data, k)); return rcpp_result_gen; END_RCPP } // emds_gamma0 double emds_gamma0(arma::mat dmat); RcppExport SEXP _maotai_emds_gamma0(SEXP dmatSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type dmat(dmatSEXP); rcpp_result_gen = Rcpp::wrap(emds_gamma0(dmat)); return rcpp_result_gen; END_RCPP } // cpp_pairwise_L2 Rcpp::List cpp_pairwise_L2(arma::mat muA, arma::mat muB, arma::cube covA, arma::cube covB); RcppExport SEXP _maotai_cpp_pairwise_L2(SEXP muASEXP, SEXP muBSEXP, SEXP covASEXP, SEXP covBSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type muA(muASEXP); Rcpp::traits::input_parameter< arma::mat >::type muB(muBSEXP); Rcpp::traits::input_parameter< arma::cube >::type covA(covASEXP); Rcpp::traits::input_parameter< arma::cube >::type covB(covBSEXP); rcpp_result_gen = Rcpp::wrap(cpp_pairwise_L2(muA, muB, covA, covB)); return rcpp_result_gen; END_RCPP } // integrate_1d double integrate_1d(arma::vec& tseq, arma::vec& fval); RcppExport SEXP _maotai_integrate_1d(SEXP tseqSEXP, SEXP fvalSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::vec& >::type tseq(tseqSEXP); Rcpp::traits::input_parameter< arma::vec& >::type fval(fvalSEXP); rcpp_result_gen = Rcpp::wrap(integrate_1d(tseq, fval)); return rcpp_result_gen; END_RCPP } // cpp_pdist arma::mat cpp_pdist(arma::mat X); RcppExport SEXP _maotai_cpp_pdist(SEXP XSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type X(XSEXP); rcpp_result_gen = Rcpp::wrap(cpp_pdist(X)); return rcpp_result_gen; END_RCPP } // cpp_geigen Rcpp::List cpp_geigen(arma::mat& A, arma::mat& B); RcppExport SEXP _maotai_cpp_geigen(SEXP ASEXP, SEXP BSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat& >::type A(ASEXP); Rcpp::traits::input_parameter< arma::mat& >::type B(BSEXP); rcpp_result_gen = Rcpp::wrap(cpp_geigen(A, B)); return rcpp_result_gen; END_RCPP } // cpp_triangle bool cpp_triangle(arma::mat& D); RcppExport SEXP _maotai_cpp_triangle(SEXP DSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat& >::type D(DSEXP); rcpp_result_gen = Rcpp::wrap(cpp_triangle(D)); return rcpp_result_gen; END_RCPP } // cpp_mmds arma::mat cpp_mmds(arma::mat& D, int ndim, int maxiter, double abstol); RcppExport SEXP _maotai_cpp_mmds(SEXP DSEXP, SEXP ndimSEXP, SEXP maxiterSEXP, SEXP abstolSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat& >::type D(DSEXP); Rcpp::traits::input_parameter< int >::type ndim(ndimSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); Rcpp::traits::input_parameter< double >::type abstol(abstolSEXP); rcpp_result_gen = Rcpp::wrap(cpp_mmds(D, ndim, maxiter, abstol)); return rcpp_result_gen; END_RCPP } // src_smacof Rcpp::List src_smacof(arma::mat& D, arma::mat& W, int ndim, int maxiter, double abstol, bool use_gutman); RcppExport SEXP _maotai_src_smacof(SEXP DSEXP, SEXP WSEXP, SEXP ndimSEXP, SEXP maxiterSEXP, SEXP abstolSEXP, SEXP use_gutmanSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat& >::type D(DSEXP); Rcpp::traits::input_parameter< arma::mat& >::type W(WSEXP); Rcpp::traits::input_parameter< int >::type ndim(ndimSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); Rcpp::traits::input_parameter< double >::type abstol(abstolSEXP); Rcpp::traits::input_parameter< bool >::type use_gutman(use_gutmanSEXP); rcpp_result_gen = Rcpp::wrap(src_smacof(D, W, ndim, maxiter, abstol, use_gutman)); return rcpp_result_gen; END_RCPP } // eval_gaussian double eval_gaussian(arma::vec x, arma::vec mu, arma::mat cov); RcppExport SEXP _maotai_eval_gaussian(SEXP xSEXP, SEXP muSEXP, SEXP covSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::vec >::type x(xSEXP); Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); Rcpp::traits::input_parameter< arma::mat >::type cov(covSEXP); rcpp_result_gen = Rcpp::wrap(eval_gaussian(x, mu, cov)); return rcpp_result_gen; END_RCPP } // eval_gaussian_data arma::vec eval_gaussian_data(arma::mat X, arma::vec mu, arma::mat cov); RcppExport SEXP _maotai_eval_gaussian_data(SEXP XSEXP, SEXP muSEXP, SEXP covSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type X(XSEXP); Rcpp::traits::input_parameter< arma::vec >::type mu(muSEXP); Rcpp::traits::input_parameter< arma::mat >::type cov(covSEXP); rcpp_result_gen = Rcpp::wrap(eval_gaussian_data(X, mu, cov)); return rcpp_result_gen; END_RCPP } // eval_gmm_data arma::vec eval_gmm_data(arma::mat X, arma::mat mus, arma::cube covs, arma::vec weight); RcppExport SEXP _maotai_eval_gmm_data(SEXP XSEXP, SEXP musSEXP, SEXP covsSEXP, SEXP weightSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat >::type X(XSEXP); Rcpp::traits::input_parameter< arma::mat >::type mus(musSEXP); Rcpp::traits::input_parameter< arma::cube >::type covs(covsSEXP); Rcpp::traits::input_parameter< arma::vec >::type weight(weightSEXP); rcpp_result_gen = Rcpp::wrap(eval_gmm_data(X, mus, covs, weight)); return rcpp_result_gen; END_RCPP } // eval_gmm double eval_gmm(arma::vec x, arma::mat mus, arma::cube covs, arma::vec weight); RcppExport SEXP _maotai_eval_gmm(SEXP xSEXP, SEXP musSEXP, SEXP covsSEXP, SEXP weightSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::vec >::type x(xSEXP); Rcpp::traits::input_parameter< arma::mat >::type mus(musSEXP); Rcpp::traits::input_parameter< arma::cube >::type covs(covsSEXP); Rcpp::traits::input_parameter< arma::vec >::type weight(weightSEXP); rcpp_result_gen = Rcpp::wrap(eval_gmm(x, mus, covs, weight)); return rcpp_result_gen; END_RCPP } // src_construct_by_knn arma::sp_umat src_construct_by_knn(arma::umat& nn_idx, bool intersection); RcppExport SEXP _maotai_src_construct_by_knn(SEXP nn_idxSEXP, SEXP intersectionSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::umat& >::type nn_idx(nn_idxSEXP); Rcpp::traits::input_parameter< bool >::type intersection(intersectionSEXP); rcpp_result_gen = Rcpp::wrap(src_construct_by_knn(nn_idx, intersection)); return rcpp_result_gen; END_RCPP } // src_gaussbary_2002R Rcpp::List src_gaussbary_2002R(arma::cube& array3d, arma::vec& weight, int maxiter, double abstol); RcppExport SEXP _maotai_src_gaussbary_2002R(SEXP array3dSEXP, SEXP weightSEXP, SEXP maxiterSEXP, SEXP abstolSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::cube& >::type array3d(array3dSEXP); Rcpp::traits::input_parameter< arma::vec& >::type weight(weightSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); Rcpp::traits::input_parameter< double >::type abstol(abstolSEXP); rcpp_result_gen = Rcpp::wrap(src_gaussbary_2002R(array3d, weight, maxiter, abstol)); return rcpp_result_gen; END_RCPP } // src_gaussbary_2016A Rcpp::List src_gaussbary_2016A(arma::cube& array3d, arma::vec& weight, int maxiter, double abstol); RcppExport SEXP _maotai_src_gaussbary_2016A(SEXP array3dSEXP, SEXP weightSEXP, SEXP maxiterSEXP, SEXP abstolSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::cube& >::type array3d(array3dSEXP); Rcpp::traits::input_parameter< arma::vec& >::type weight(weightSEXP); Rcpp::traits::input_parameter< int >::type maxiter(maxiterSEXP); Rcpp::traits::input_parameter< double >::type abstol(abstolSEXP); rcpp_result_gen = Rcpp::wrap(src_gaussbary_2016A(array3d, weight, maxiter, abstol)); return rcpp_result_gen; END_RCPP } // src_cov2corr arma::mat src_cov2corr(arma::mat& covmat); RcppExport SEXP _maotai_src_cov2corr(SEXP covmatSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< arma::mat& >::type covmat(covmatSEXP); rcpp_result_gen = Rcpp::wrap(src_cov2corr(covmat)); return rcpp_result_gen; END_RCPP } static const R_CallMethodDef CallEntries[] = { {"_maotai_compute_SSR", (DL_FUNC) &_maotai_compute_SSR, 2}, {"_maotai_compute_stress", (DL_FUNC) &_maotai_compute_stress, 2}, {"_maotai_main_bmds", (DL_FUNC) &_maotai_main_bmds, 9}, {"_maotai_aux_shortestpath", (DL_FUNC) &_maotai_aux_shortestpath, 1}, {"_maotai_cppsub_2007Wang", (DL_FUNC) &_maotai_cppsub_2007Wang, 7}, {"_maotai_gradF", (DL_FUNC) &_maotai_gradF, 3}, {"_maotai_dat2centers", (DL_FUNC) &_maotai_dat2centers, 2}, {"_maotai_cpp_sylvester", (DL_FUNC) &_maotai_cpp_sylvester, 3}, {"_maotai_solve_lyapunov", (DL_FUNC) &_maotai_solve_lyapunov, 3}, {"_maotai_cpp_weiszfeld", (DL_FUNC) &_maotai_cpp_weiszfeld, 6}, {"_maotai_cpp_kmeans", (DL_FUNC) &_maotai_cpp_kmeans, 2}, {"_maotai_emds_gamma0", (DL_FUNC) &_maotai_emds_gamma0, 1}, {"_maotai_cpp_pairwise_L2", (DL_FUNC) &_maotai_cpp_pairwise_L2, 4}, {"_maotai_integrate_1d", (DL_FUNC) &_maotai_integrate_1d, 2}, {"_maotai_cpp_pdist", (DL_FUNC) &_maotai_cpp_pdist, 1}, {"_maotai_cpp_geigen", (DL_FUNC) &_maotai_cpp_geigen, 2}, {"_maotai_cpp_triangle", (DL_FUNC) &_maotai_cpp_triangle, 1}, {"_maotai_cpp_mmds", (DL_FUNC) &_maotai_cpp_mmds, 4}, {"_maotai_src_smacof", (DL_FUNC) &_maotai_src_smacof, 6}, {"_maotai_eval_gaussian", (DL_FUNC) &_maotai_eval_gaussian, 3}, {"_maotai_eval_gaussian_data", (DL_FUNC) &_maotai_eval_gaussian_data, 3}, {"_maotai_eval_gmm_data", (DL_FUNC) &_maotai_eval_gmm_data, 4}, {"_maotai_eval_gmm", (DL_FUNC) &_maotai_eval_gmm, 4}, {"_maotai_src_construct_by_knn", (DL_FUNC) &_maotai_src_construct_by_knn, 2}, {"_maotai_src_gaussbary_2002R", (DL_FUNC) &_maotai_src_gaussbary_2002R, 4}, {"_maotai_src_gaussbary_2016A", (DL_FUNC) &_maotai_src_gaussbary_2016A, 4}, {"_maotai_src_cov2corr", (DL_FUNC) &_maotai_src_cov2corr, 1}, {NULL, NULL, 0} }; RcppExport void R_init_maotai(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } maotai/src/src_computations.cpp0000644000175000017500000001126114176771675016573 0ustar nileshnilesh#include "RcppArmadillo.h" // [[Rcpp::depends(RcppArmadillo)]] using namespace Rcpp; using namespace arma; /* Functions for Generic Computations * * (01) src_construct_by_knn [hidden_knn_binary] * (02) src_gaussbary_2002R : Barycenter of Gaussian Covariances * (03) src_gaussbary_2016A : Barycenter of Gaussian Covariances * (04) src_cov2corr : use Wikipedia's formula */ // (01) src_construct_by_knn [hidden_knn_binary] =============================== // [[Rcpp::export]] arma::sp_umat src_construct_by_knn(arma::umat& nn_idx, bool intersection){ // prepare int n = nn_idx.n_rows; int k = nn_idx.n_cols; arma::sp_umat output(n,n); // fill in for (int i=0; i 0)&&(output(j,i) > 0)){ output(i,j) = 1; output(j,i) = 1; } else { output(i,j) = 0; output(j,i) = 0; } } } } else { // case : union for (int i=0; i<(n-1); i++){ for (int j=(i+1); j 0)||(output(j,i) > 0)){ output(i,j) = 1; output(j,i) = 1; } else { output(i,j) = 0; output(j,i) = 0; } } } } return(output); } // (02) src_gaussbary_2002R ==================================================== // [[Rcpp::export]] Rcpp::List src_gaussbary_2002R(arma::cube &array3d, arma::vec &weight, int maxiter, double abstol){ // PREPARE int p = array3d.n_rows; int N = array3d.n_slices; double S_inc = 10000.0; arma::mat S_old = arma::mean(array3d, 2); int S_old_rank = arma::rank(S_old); if (S_old_rank < p){ S_old.fill(0.0); for (int n=0; n(N); } S_old = arma::expmat_sym(S_old); } arma::mat S_new(p,p,fill::zeros); arma::mat S_oldhalf(p,p,fill::zeros); arma::vec S_weight = weight/arma::accu(weight); // MAIN int itcount = 0; for (int it=0; it(N); } S_old = arma::expmat_sym(S_old); } arma::mat S_tmp(p,p,fill::zeros); arma::mat S_new(p,p,fill::zeros); arma::mat S0_half(p,p,fill::zeros); arma::mat S0_hinv(p,p,fill::zeros); arma::vec S_weight = weight/arma::accu(weight); // MAIN int itcount = 0; for (int it=0; it'P+ )@CGA.׶uLԬFv(CFV*d{-2P}ks-r_g$>:/ˋ/< sMH|$T,7hévyHBAG:uI8'1i5"(PJU3DHC4Ï9X~b56W!/&SظDvGz+b IIX+ymvtc(Neƨq #P?%+= d:u^Acx >Iw^'1ہLf>s P BQLhH '2"$M98g?;'/~IY:lFz%AH׫+lm@g[[lXeG70eP|~,?L1atD3ܳȀopi0=՚z5g:r4Z dBO0>d<|Ћ9A'yuI.Hw6Mqiu5&ss]suҰ $mhfE'zy=#F5_+y?^uJ<*ʼ}0@|z9>R{zE.ȓ"&Pl_YhknٔߗA]h $ӯT mݛBW (Ly#YirU{VDz{u~eNEh'H( Rnh%#c_'b ?Aͮ S WsM _o!$ @ jΧ0=M71{}U>ֆQw\fWIU4!Jk:ނ.Tj:GϣAA?1a|%x|0(q B+.:12 ?/-.-)6rumY| d|nO2a\dLތː._TCYFE!NZx"dQZP_9N,![+ߠMmEߐ\F}9F_a$>rlU׽a{']R T@iF#X9!)Y l;R\%5qa: 0y]F;drV& 1AƙUNhc)?JKXR "['Rn4A=h9h"[T T7h=@ E GWiҾ܍Br)g"O3R|A#,@ G)p$JQ=_aRJ6!F#X*quEװr:v5ں>6􋱖3fՆVy4m u}ous3;~_F=+fr +ehU6{]rWDzxSARPԂ 奅!e dǽ/hקV׏%զs/6R,kktйM,ds{rsU\E.m͌ VYEwPJe~{})3Zܰ ߒQLžAͲ/2xMr c`{n|b5mhie5nz{Ǫ'FbKszvٛ{eVM陙)jZvu_HûڶjQJk%SORǎxuFQͻ >=e£h{H3 vtDKh:yC!8iDO]ݏ$Dg[/_k!Xսkw>httTve:!xxhoN][Mwg:9f7mytwIڭ7(EQ#;^0h#/a[/jbH; {:~ruSxu|ivm5s'w:3ӫ˻8Ug۴t`g˲j[^.Jio\FphA+x꽼ݲv`kmm5֘nV5歱Z^~fm]Ǣlq깩m5iA;*H+zn?vpE VݍY晨Z}xqm㕈jU0Mn5ZU--s|ungv$6DÉjշel&W]һK{cĴ &歓;`!/]D4fUʙ6>uCG{/iǰ+"-ѥlG.8K-%fj 68TRB6`K "AMCƝSU'NӤ#wo9=ѨL nblRlkt8^k#ji_{?Hѵ顖o)]WaNYúwK-?7#X tUM+iV6 S!Z=sqqso ^ Pw('VZ#o7ES~l sY͎ ȟPII5^G`!)ޥ e"tœ#5 vͷWϑ*ҏ3uxT4%_/_[[:47k9,ay]ˏbaUU˻f[]YݮWș#sh)Q+ǎ3#*}8鳳`78z~~:'ޗ3œB5 y{1-U:ӵ19J7UR&n{]7Ksjr^5\b;/XZzaÄu/w摇M3s559.YivO'O`w=隣X*ޖ!>0rg;j!̣]z$#8SHQ"HSjA*4}(1i@1#(GQ*T[{B_vO3EvzFج Mȼ" Z QVh9H> WҖo2A˥ɒLFb25;Y, Ps5̲2=au2]sķyK#=dxXGܙ~ :H  d.Z8Ȧ^q-溻A8@ Ȁ pg$>*n]<$w姨*mPU6 UZ71pP.0挺ddj5I3RfKPr K&H46xYTͣҭ6=9k=HQ(_ (o~Kv!m*3GZ= %'?I0s(T0'jocoÛ.1ڣhA{Sf.9)N ӌL(.`p8#.434޿ed*fT@.-<~ަf`u i<;5۪r5_,;o5]D"slӺ /65l1_*^20Nndgi,şs 6!t_t6> (_oŏ<": x訫Rt_Qp"OS۰j #q'5Wh @v Bp8LE2T~ಅ.!KJkίR""Jѝ8*k✏nh [dJ̛F.a&eڛcL6iGڹ[^̘^% ӯ-??z8a2 [}$Q^DLnbZ  mzRd3H&g22M5 j*r#@AQ%IMu}ܽ7LtS| {9){M175# w{>d2$»ޝ&^@q|F+wIXÁuSeᖨ­ Pocw5L$ 3&jaydlJ׶Gx ŹwZM7[=-x\jАDçvŋA $n)"!H'R*{ Iӌ'qERa|M'ci,@[B:b$>ۦT6~47۳A&oRxB8F>E^&Y=:"9\(>jC'EԽwIjĭ# uw٦EAoEsO߂bM^0;h?`kqo{St{YoL:Nnnڳ\4xlĿ&^#9".'0%V,oMLKŽՂ$pMGQv'8ELf Kh ) >FcpP$MH|iXV3 QhJ6YF67BX YU%ԅi,$m!--ft(Փ@X.tm5hyRȗ6D̶Je.#HdBd2dw= =8"a @:#2Hʙw!2H .D k"I'^hh6 8]СmTHՠQnz &Kyk KuA%i 9Ea-y 2E0 IL"b!-y-R]$iKQ !m]*A2$X )h֮'iK@&XO UHb8#=H[M٤&)n %b$>|Ұjk8oڃ7 -iGLWF-ǻC—~ȃ9l5"O^4? "&'*BQ BjrhX{*=8(b`+L{N>/zB] [lDZ*^M#Ƌt{]3G5E<Nn9Taj㕕PY*N].1kY5gnnn87UB mF|ipkT-N1Y baj/Eݵ倫sePDw(9U$9 gnrɮ^E}(!jGVMl['_~*pjMC-t\UTu):4g-6bvtwu5uh!לXIun2M/"Jޚ`}͔+~ej>`}q}Xɗ @J3r 7ގ49HJUqgwN=/LSonzJ,r+_F rxGFLgR1<"">(rɟ`d9G]`BS'!dI #V% Yi---5kQ5h@Z#+S9zqzI؅PU'2PF4޹uݞHuןI'gz!yG䋌GQx *!%),s W3g!+ v.gdv^/QSX<4UשT&$'G^í : A9X)ҖUn""JL?G kB˃\F}9&_a$>!ǵuє_A2apk ͬln(`lc̚O[QNl۲y 2rusQt0PA2E?FtoLC;$RRhir X2GTgݳَ?M4N&,5k7+.4"$jP>GtI6ᴗH952hW  a_^ ܌yh1A>zqB ň|iR|0~{ul x`cxdspچ㹏RlWQ* :՚UP$sŹ.~QdɨX5\Ӏ A4BTu$ Ĺ^.VB_ec]բnY}"E&I5ò5fB#PLySd`bOl͝V1˾崜7,9Dy,clI<c*d3+gr4L!pnz\ed D/|a'[_MF({~䲲I˅5Qz}<7V(߾hێe)Δs!7pW ǯWc৙LLd"r|.2#OjqvOA{<(8F+t~oЫ ݊? o{Iw7*vЃ%͡veÿ -ƵxMɹSMeEރq#s%sdsdP.0 `g])LMMLKS9R}0Q]XF|P5yQT}{xS4]h(' 2to4K׋NbQ {N%F]%04Cm#'{TiE=_l,ca@Eftw,B{p[1t$;01HZĦea35~ejZqcn-VbҌG\8iYZqz߹X3I.<ƪ6࢘wci(.ȐYضFXx 3nIe!Pqa,è'oH~ $^,éy?E&鏗eu|/(I (dnنQ'peb鸺 ;6+5 ׀=T`YZn~KaianSIx,#4ޑ~ሲQYn<,3QԏJ9B9n8QˑN]0!5^¾"?(\ * JS]3y`Zq' 7ꀝ%-ۣg+l!x2[٠PY/R]av ,t@v"q̄. ";QK)'iOByc"ռH(/v"I֬'dG|uGIYN:|޹Ux2P88/} %/ooO0Na RE NlЃl}1]шaΆU"U}0uqRGA. HGPP/Lm<)_ٵ=&>A4' BA]Pfzpօn4 $3AyF4 |g?vy"j4 ̨RuA3 9@F4 ">sB<{a_ qYpv%i;hs@;j\Cyu%:Uag.ؒub[3V3;0] W`r-9ߢ2DLO)}RC]6qbGT2f/UG|{ ɛGQ{S`ϥd"ӌGrs>9RҌ**SFn:s., 9."U;smUQL^@>* @fz A1jUJ+)@r hyP| .~Iev]S%Ŕ_Aչu7,ŖZMwZp|ֵWȜE|ԟcѯ@>Ei>?c$>rߓ4 lhht F CT9Lc`O0ˁ U(wHʷ_l =R\c31%<:`KT42oQko阐xT`鋜-iV9zTW{/ESj嗨{0͠\@}Aژy򊚳7X| iL8\A3d PPLnXa 5})i^hmSj5:эB@WGQ.$MU׹cƞXMTKC.{ UZFFgim%aCဍ Wz7'W=?IqCo4 }ц_Z+ЭUڶc8|),aYvPxT !g[y_8s))M@MVԋ-@qee.+3R&wfg קrC­9by6 荔Hߕz̜SeìfsȖIdjZؠs3h%1HfV)6&$H5mu#dp7o>]@nwձj xCdf 6C`Ka+`-`I9p% >`3UXα^jfsƧ\&LJlkkBl k'Ï!#gW%_KFec8#|vΧ\ϼz&Gw3喝iaz|);Vc2-i."#AP+WghsR`YgbAQ0 .4+Kd*Kw0wfI.ɜKǴ)'4/R.jFyhVkwl_ h&C5?=,A!H:Uiz8!7 ej6=dS ZoC Y걀H@>"$ve 5*e/Nv(4~"\W?^b%3'(wUyiRMQn2+2$@@ ۓqJe{9 !JfejL9zc>#POHfBC#.QW2:[oXTaťBq)&{t63;nD=Ol;h@؄ ,@ G)z\5ɽLQKP/)n %ÿI[ `n}IyGh8c@MPTp2z2y oy@)ϑ'4Ld!knA3Y.z|Udw{:(8F礮oL,xV<^ ?׷|rK9H_2'm1hS![< -adڪr ٗUu`Tffa54n  ΪޤV#Ү)NB ր]HgdngKt$mzDunȑD(OťgCmĩkRSʼn o?yr9u{81=S(nLVXj_Pt GQvCUL!f$>bni?씪ޛ;d%r^##njkdخ)8TDH.vLR&HDlr $>l2pkd Cp\(7]`/Zd I[H|'p(>b95<yK!78r3Q+]o 5.IWnɮ1tύsÁN0z ~ ?8 'c֝ަéC e~=n/agPߞsv3F⣨ZEqE@jJuާԲֈn(+.iq(o~KXz/P..gELmSV:Jq"ʧ?UX \fưS?Iu#I{#\ثCWI| 3|Nw`˨_Ns #QDG1\!t@r[Q˜?pA; !*,,ib1H|7|DNp,U(Q- ]}VfըO6dIE6ʆ%yԖ %/Q/}ԿCZfU`wQAM/o(BlŧR~)_zO{ Ϣ.ހGz#CňQ/H|bBjP^BF9eTEDPQ/(QAN.$WByyQ"SX@>B]f+F(=!9?XO3Rxj4swqbRg=0T:%H|$KnQZӪZ-BIn"U{ߓRYs @o- VC:p4/ f=/@2$ӝdX&`X{ZQ??[@0 0m[Wz4윟ELi?# L,E>.P%IBF1- b9R/2-VjLm.Cma39F G)zН%c5{X/S~H|K;=Jcopd |3A.N@ چ7;= 9Vip~<1U+uu uq×j'5J1.=\xY0õ5be'{ZcNaV6jy";fL=&V Sm5&#Ü*18`M KdeްYSl ±N-45CEͳL-_YXq\Q^$f<p"GnNML ovS-NgoN .CJײj2T-# CHb!_,NLnܼ9yfAtp'݁U <S W˾0Rs$Igsdu$}5ook,#=dGVl|pQTK1;k{]AsNU_ M[~s ;HOܭX{ľ+{k톱p)nhиyRh4 XoGOpQvJUqgMnZiܽ04I[M{g6aGP*Tr?XiH?H|SN7ح-ZwN,_T_Ej1AS#.X,gzqK _{R _;㍫ F1ȎtΏ,tJyƽa@ A]EYwzY®y*cӤ vL}TԫNw1iKW<@/"837S{[k4hy-XsO)6ki_!|?}[n~;/z̿[T8)9]N#dxU" G8ÏUf8›r\5LТ!diw !iL=ϻXaqiOyr)RLn̻&^kXy!r}G4x/p" brֽ0m6RgLBSv0ܥ o4#hH3I*9-w]^mx V[oh\;m8]3XA*Wvtqb;xX"™>Nl3GY$]`qԅ &4NklJ #f$ rfNEd_0I!Vv a{tzI9")AӌGniCGTqێs٪QϾ8yUEPM7Sg~OS Nʩv9T'dM<;yYi!G.9'&p0lt]J<<Xfg\(C;ƪ k$`QOƺe֮hj-ͬX(OLFⓀ~EQbaw@# Hb& 'i[( a<'6nLs40F6p7ͬlY7#(Q߻Y/P"y1a0c`pEU)=Fj:\w mhb أ:jt.̠QX*V5z1ԅ-%pk414N{ǸCS@uԅ+CGG.ʯPJPljE{pt|raߐ\A}%VH|1%x8 9ruwI9ߖl(.4תK N}.ƟA>-:?7 [ w\@}a]5g?KXQ.4.y5|?$;`$>rݩl/U|VNS6u:E vV-xĸhH}Tp) 3RY^'ujmnjvfuM 2ehQ:#k@zsz:^ nu5kkCsYc N \F]h0ؑˣ"ϓܔu,ͥ(=ۓ3Qy]d)`YXMvڦabvA{$7&Sǜi3=3Qjac$[ϑ홉sÏWVBajf4}so pȵE؅u֯#aH1- )7MGj8Ms`5yca")&숒 A΃|>Ha; ~aS |pS>3$E\M8 V"w42(Y8ȮFXH~$((T}pZ ">GPI&@3`# NhD]4ɟb$>ɊU34 ysSk$| m%/%o#H0A=,gr9n_V.I}r]}9,/φ3.p:?wܾoh16x|#rjNOQTY3?SH]sm㝷lmr} ('P #>,oa IF4],U,pbY,BT=ڲS(kYh/,[Ft- $?H%4ho ӌpG&Ոc՚LVN\L`Uު{ٖ'l<9i#poQfPeQP/%b1SOI[)ͽOA}FY霆ј'Sm`4#ؓpNaR wftmnZڈ&Ʋg zᏫ $2v($nA=?HΧI֛t=1vH}9 #gZ R; sP0r&ފ{5M>Huq=6 /&{St/ʓz Vs,ִL7 X<רh繳eٴo(l#Br?4P7mzijU+ PϨ,c!#۲Fÿ(7G5UZ͊2` ` ԯ(9H Q\Rqgpe:8۵Mqu[.(o~+mi'kM#d>Y/*+ݸN6mhɧ)7H焨8#Dٱ1_כ5oƶ >; nԶd:Ilifqu4ێeiM)7|0-MةTJ5#MۅS"$ 'X\L0MoI}Y tZ@i[w35,ua~OL>x Q`MxM7NŚټ',r sEܰ5"".!5.N6'鶈U+߿8^rSw]Ʈ4|8rh3ĩW29$r p?(L3ySE<2-Ss,@;DK]ߦ@ mJE.ğXߘxmf{V,4 ]d+FJ<"zYw͟RAx9j k-aKc)f'&S{AI`f`(.|rEԪЈP'2ؤۏXoGf$8/Lw0o*fr5X3ź9Ei 23Lj0TA E UD#Mv M&iaΰɟb$>9m[lFFog\|ekUvYkD#p?g(_yb)ƖP_Rh~(ƞ/Z}P[Zw嫊g@1 &ZIhL=N5}R*ȋ0 ӌ׹?I."b96ײ<^*eMZCi׼kkR<7Q) !>.yQ `܅G=YhP0<μ'vDUǾJIC^|GGiZ.:qtX]D8 PUI8*C B]B(Zbɲ$,PRX\`mmy~v\Vj Ӭg5'7Y*_ע`"f+_zW0M?5 4܀Y#A_շ_ѣ '~`;0m;mumܹa<_ɻpmmJ~z2Q"obځT\4FmmT 9 alXk7u`Ys9vV˕ .t4/bH.P_ˑ>Q1G3 #hs)@Wf l;Arꕁِ pg=. _aCp36 ըN6lhaUOsd3q$ʋf{vvִU-=jۇBdbNI\} zJ+|ƉӋ8@2#4IWg6u s[@ J1` MJfJcv,u@YBdQNu@,i+DyWM# ƭL;r7I㹉H5sS^1O.Q=Qn@J<ԡ׺ P8ȶʺi46rO6i:trz{)d ,[ìhFMkEMP r)J%HExCS?c@H6y=Бf 7 -#2 :V;V*_QCzNt:{`854;ci~G! V.)BGЀiC4Dr;٧9Rz-G꺻aU_f9B&r"H~j&STs)C~l̉`[UXn7c4cp 9M hLA}&hf}MoJIk.EcBuu~:1H<_|Ki\4@Eޞ}B݊gMdHgF4,@3NpaC Uc0W<2],jv͂10olo;9<2n>V%_&X#QNI;TH|Y8iI5si$[3w\| 9g*mWx{m3Q~ knD ɝFI0rcʌrpǢYO\GG=]D}R.CB{0i/}+@)2M ( *a#(#Ni keLȰN$KH|TGA9Toj_)Xh M9~6l#g?6l2P װAGqv\u0S  ll7,:v Q4 Y~=~:~r; !#vF'2 'wDU0K 7wNwH2#QW5W˷7 ,Tyeqp(J::E9Td0v&H~(Qd-Gas9/.sX &lAN$98@.??jN FʌV@< %J؊ia1|`WD!+!X@A 0R i'=|iTFL*\M8AQ s!G!b C6rxڼQt׳]Q(8pNQ ̏O;ˆ|Ox{iwMq,:p az[I&g2^Z]=|ΦT.1iLq:'S.E)Z+[:8f5`B1M5}SE^X!`k$)/ shqpB fggi=uw#o5"x3ヰ+Ll$ 2a9h%)h=F`qE_"J㚠o4[;(||ҋx!/ L̸GR<R " r 7hU=w'qd$1~^SznwD|@_!NfYsARȿa딥wE}`cr \f`q>e"#Ίv^FefEgr06_ȕiQ׶ IǏ[p6h>5JmpW9 lf$sRs;!<̂$cвYPìBT/xBS=]7b7ÖQ~O3q2py&˘dLC%~ -䶑oR@#X*m#ۺZMP,Yjd $WFWOH ZYȑU<]ʑeͩl'9R[FխF:b:hpV$-I/iV+<Ոԕ푉䞲́W=8?pU7W4-7lwLl펧䙮f,kk9(]"9V9Nȭĭ[SoG<'řު, \f ~e` }kG+^>%ڇw)7].43PA^E4\k|1$"&|~TPD >!I?Ȯ;ɛc Kh]ð'mH#ǐǙР}ޚUڇ^s M<Q KGDiC1Zua[&wT$\exCpFʵ(9QpNdat~dگDGdpRI8g>Fy u'<Ո=u"]Gշ6,֖CUvt{S^Fʧ]-WI7<#F(BG\T@iFʵ}ͽn(9H5]9i󇏈w{v5-ys ;(h`e$rҽ,t[0#,G!?R9D:iw\^k6t8qӉD|PSԑ?lmk,j5&s^Ѫa{'@Ҟwjۤ#8;y ?ʛl(O^9ôdv4[PkM-mۿLg273BOҹ|;mVGhh@b5ojֵ|CSwH[zG/T!Fv,oGV9o7t՛Rp3ȩKf y sco0;ܑ9f[5 /ϑQA z7ͷ:6noLN2l˱O<̉";ڊ+<Ո #5)ν[2ȑ%CtO%^!tX_'5ˮ\[2x3cuL\~;$?Ms<ȑ[[U<*V re w< Yǹ&dݱ߉nVܦlGWm0Y{!oʉ\a.Z\,k4v FxKN[ʼn۷ o?yr9u{bjbf01u4]vڳl?2N`$&/ ;R:IbW.;S)U7;7n*Mo%=ɾ&{A7s]$ɾ@&̱&ͱ&{IsoOlDž7% FG* WZ!JIY H|-^<^X^{ikמ?kIYB{K,j{C:I5<쵗̽^`q͂{Ke^xKwԂ{rat@9K{Keg[rtޛ-r4-ܹ~ill$'垳}˽` h˽ 8 BJv˽jr/{-lN<[.[%m-IػllrY*or_-{嘸NmLlbr/j|{eK%-To̼x`-_[eʘܿW=-Iv^7WPVp˽Q|[%^o h˽d?L|=_r/^ou}-;)[h~ O ؍Ϭ 'mH4#va:X߶`\D H|&Z+M/Y&ouX]9PRh~vO> F#hT]r'Q?K'%;m} ؍?P83q~n6 1daXѤ )cE[* /~{ɻ9$%#Q{I.;w JVVl wR6P/(-7'C7wL n,OY76l9L vxٝE iˌGGw>CA=#mg̷AaXnlſC gxF#[+̠qF?a67k7k| ISNf2cΆUAؿH'>zDNg[rR lW E-m~S 5Mۃo:SԌᒆmx[ P[jǡm;Q*5n֪%͐LLVfQUֺ{ ر:O3R^V}ӿE~:[ X,1B+W9* cgk&?z^1BuiUpv1t$XLdH@T,vMˮgg9bqBɚ1DZoUI?s87g(1?dn uz1F8 ckK81G'*7Z# PY܀."!UZ~SPLFv"[3ӥTadA|ghl-yրF,_@[F 7 (Ed.bمgi7+7+84b-3-mf7 Ռ?< *Bh54Q5kUˬmϿZ!S1C< SfJa74ճn~WOa\XMoQ13sޘdsr&Gմ|>t2b@ ?Ig3ktݘZA0j&W۪%~d"^\uR]/_o`ځHn&>J48J$haRi@?sc-(8Uw F(R*]NUqVOE]NU MBA)fиw:!!GS*#f7'OZ5Ibf0"jhe{0\pts:1G?R {KCT7No3yMT0&5K?gu` g|`hNQSVP>B]ȳ:zknN!Hfhlhh,ujjoX&QANa$7.w 8' f{䊆hFeyQB9*po^?+0f4mX[1x3Co Tyz#_9S Q>R{5ꯥm|nus?@ Ki,M4xߘGLo׼:obu#uQUv3Vb$> mSv;(B QF]h6>g8sO5REyu^sۗЍawQΣ>,7%5ө8=#]n ,p9?/ZcC>Q,!zu]wk!?D4>9i|gfvd\ p"=r$ѻYswL98IX#,hE8ȓ=T4#P( 1TauձgӨ5`g~ ?Pt!zMu3QauMnELOLx+F-&Ȑ}?q `ʎ%sE@2me:=:%(!INT0S9$A=9mf$1um]RǓ t/`C"YgK @L*ih}sMZnɧ)7ΐ!< oZo7KAO\G =B[^G20.von$@uO 6 ۀS%k<ݖzi'[a K{L\1!xofY#k<89#S3{H+t o>x wme9fڅI,3gE4ˡwY.a dxvuxP8uEёvk-kOi;|:v'Vzl:^DX?cN~2y@ϠŜD]tBS.9ӔU QF*gW,j؆<7 \BB# CP' H|r(ՓFX>jUBϑE0Ҏ" (GexbFvm"w +i"-D0B)*U`ND4'|)YNgD5hekS睩8PN.\FDq}|- $?H|pDpjYs=A,A9ڣ- ;2R0.Leae'9g˲ &3r- 8rO,Zat`{e8P&ز iAe')ܲIHJ9*H>H!Y8F@Ոc՚\6VN\DKU-8gOm)礍tn8p碀mΣ̠K=ˢ,^JJc(P?;?re;jUA6|r IY8HrגHJړ:Nck]̠C^xMn<#*5G<B4S5V ?NvDRn1h q'rL=~h}39H9nV "!ԇ j2lloMkh,`9njoýF{IqrM mS'eN8a30QQ#W zXc$> xeHB:r_.uFⓀ]6Pv9W*<$ʿiD_n;cm4 v V,> h-[k4hd@\O_u:4QJWPGPBr (_RڨqSoP.i?,.> i'kM4 TQ GF 4SԀ_| zgd)`|6kcPveHbluKpCd4I П&fK39Ӷg74nP J .xq]Tqtb\޸.!3/6uMZ_ooO_kEP/#< 5$Գ'T vw BBm(F^2w45`!l3Q킵3܈Y72@FD74-Kq/UHXYN 1RyP:Ym2ߴ(ޔ śA>PXX ŕ kd1G Hrϋ`r3!vouY tZHfjJS'<6uR7ngFma M*T U|: ]NjnHiWt*-";1?wG KqY##kɖstso! vh?IE,s|tŁjU7v ,USso{o8%~'b$ⱘkև5kr i YY+D3N>#~DM 3KO0(md@.xu M?r-2$?H|+[duYj _Ph~/~1yGD#J-(X,f~4,丱̂cϏ(T5E~,,/:, u2OJXn2uB꺻aU)J6S*jZCbu;99!v ?`XrQi`Ä @7a2ܩKJoA2!l7ӓ(fBNb~9ꤷF!3|Ǚ{UfdXc(CcDմ.6Pd:g<2HLNe]݈mT$:&ۚkلuޙd@y(wF1]@!ox G'i]/ lpsjÌt8ܓ ;2R;R]3TuӪ-!BU9&TuH+F4 (wșTg/8=S(kq 9I]#T,qm0]l|4ְMWsp] WQj-զk"FA.B}+6 : wKqO0XbڨU Fy­T7n{'e\^>^.ЗʺWMz>O`%XA>sۣˤ$ @ C$'S&('F7Fw{òHs$;t ;>,*FG6'\l}n> H&4+qf`HnX5$>#(Su%XPڤ38Ir&׬ZڂarM9e2|ؼ[UW_h{+Y]/tTHw2'Y٠\+7wd< Ph@ WQՏhH#.ǂŇH|$a]z뢃8')ג+pW (vdƎ06G>~QP) "PΣ.4m _.?Y_b$/Gj{(~K>iS 鐚ONZ!&^mz |flXZM {>ߢt&,^@HQ>{1$'m"=Aj07[%xD \uayn7EN|LM;,y9nw>ӚW2BgRm} I'킗tu>5С$NEZpV ‰\kHOܦ;.7BEiU;Gw3בȊ$Lʆ ڨp)!RJ6e=V܀H8$;i4y .4cڡ0jQ2MX|>?9 ԫmC0@&pD?-L;jZ©|}/; S3]%DnaVrO|)c>7[ B"!",![ B~< lae;?s^79Yu9K(iֳ^qߍ: wݟ?ϜYnNsºYM$#7Df_BՋpŐǺ5Q/|1ctn_ L\0+dp=?{R­!D A|1O٣6u_Ga 0 Ҝ"rYaAv5(J+|Dk;qK:tq3"6wcJ:cy-WT[n3m&F xF!]sA MEw3٤ !4#3<) g)/Ry.89޴0o) (~st0̖b7o5yf'9$r&yW6 3X}w/ w2Pl;Y5}{PS*B]Aߦ ,rhE׹5&]e}ӁI_ IG7l zd8MցB됅FfTkG^M[S%Fx O?}~&k! yhk+@1 )]c戸$s's8}kb8s{o5" f`$]%=_?E7.~|lK$s:&P7cCt _MP^y-2u̵a!@1=AA "DGM'lrg僶 iS9 ؈KG6 Bq7l&nVx3cSɰ̽Zm]B, s81=X5fJIƯ-&٦ sφ֞TTXɜfmR{d>f<1ysCϑ 4@f k7,byZPƾm&]:(\݆hqx[x3cS[k=7O IzH"e]UbF<_U`Ra#bU)s gsVivbjK h?II+cV++䣿WX 3g`SW9r_մg#+V%?@W,G{1&Kc"Y"nɎk +SWF oCo{}1g%o4o)Dv?.=f.s gaٽd$۱k?2p|Xݱz$CC* XoGwφ=w `I?/ݹte7_Y\"[kzyVT' sb0{U SAw\WUlʖE#֧ާVoW:~XQ5D[F+ƼQMd>knUp?:dk7`o2; O8۵MRN5/Qigmaotai/R/0000755000175000017500000000000014176774070012074 5ustar nileshnileshmaotai/R/later_matrix2F1.R0000644000175000017500000000365614176771675015205 0ustar nileshnilesh#' Gauss Confluent Hypergeometric Function of Matrix Argument #' #' @references #' \insertRef{butler_laplace_2002}{maotai} #' #' @keywords internal #' @noRd matrix2F1 <- function(a, b, c, Z, method=c("laplace")){ # PREPARE if ((!isSymmetric(Z))||(!is.matrix(Z))){ stop("* matrix2F1 : input 'Z' should be a symmetric matrix.") } mymethod = ifelse(missing(method),"laplace", match.arg(tolower(method), c("laplace"))) # RUN output = switch(mymethod, laplace = matrix2F1.laplace(a,b,c,Z)) return(output) } #' @keywords internal #' @noRd matrix2F1.laplace <- function(a, b, c, X){ p = base::nrow(X) vec.xx = base::eigen(X)$values vec.yy = rep(0,p) for (i in 1:p){ tau = (vec.xx[i]*(b-a)) - c vec.yy[i] = (2*a)/(sqrt((tau^2) - (4*a*vec.xx[i]*(c-b))) - tau) } vec.Li = rep(0,p) for (i in 1:p){ vec.Li[i] = (vec.xx[i]*vec.yy[i]*(1-vec.yy[i]))/(1-(vec.xx[i]*vec.yy[i])) } matR21 = array(0,c(p,p)) for (i in 1:p){ xi = vec.xx[i] yi = vec.yy[i] for (j in 1:p){ xj = vec.xx[j] yj = vec.yy[j] term1 = exp(log(yi)+log(yj)-log(a)) term2 = exp(log(1-yi)+log(1-yj)-log(c-a)) term3top = log(b)+log(xi)+log(xj)+log(yi)+log(yj)+log(1-yi)+log(1-yj) term3bot = log(1-(xi*yi))+log(1-(xj*yj))+log(a)+log(c-a) term3 = exp(term3top-term3bot) matR21[i,j] = term1+term2-term3 } } log1 = ((c*p) - (p*(p+1)/4))*log(c) log2 = -0.5*sum(log(matR21)) log3 = (a*(log(vec.yy)-log(a)))+((c-a)*(log(1-vec.yy)-log(c-a)))-(b*log(1-(vec.xx*vec.yy))) return(base::exp(log1+log2+log3)) } # # # special case # myz = runif(1, min=-1, max=1) # # asin(myz)/myz # scalar2F1(1/2,1/2,3/2,(myz^2), method = "series") # scalar2F1(1/2,1/2,3/2,(myz^2), method = "integral") # scalar2F1(1/2,1/2,3/2,(myz^2), method = "laplace") # # matrix2F1(1/2,1/2,3/2,matrix((myz^2)), method = "laplace")maotai/R/kmeanspp.R0000644000175000017500000000352214176771675014047 0ustar nileshnilesh#' K-Means++ Clustering Algorithm #' #' \eqn{k}-means++ algorithm is known to be a smart, careful initialization #' technique. It is originally intended to return a set of \eqn{k} points #' as initial centers though it can still be used as a rough clustering algorithm #' by assigning points to the nearest points. #' #' @param data an \eqn{(n\times p)} matrix whose rows are observations. #' @param k the number of clusters. #' #' @return a length-\eqn{n} vector of class labels. #' #' @examples #' ## use simple example of iris dataset #' data(iris) #' mydata = as.matrix(iris[,1:4]) #' mycol = as.factor(iris[,5]) #' #' ## find the low-dimensional embedding for visualization #' my2d = cmds(mydata, ndim=2)$embed #' #' ## apply 'kmeanspp' with different numbers of k's. #' k2 = kmeanspp(mydata, k=2) #' k3 = kmeanspp(mydata, k=3) #' k4 = kmeanspp(mydata, k=4) #' k5 = kmeanspp(mydata, k=5) #' k6 = kmeanspp(mydata, k=6) #' #' ## visualize #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(2,3)) #' plot(my2d, col=k2, main="k=2", pch=19, cex=0.5) #' plot(my2d, col=k3, main="k=3", pch=19, cex=0.5) #' plot(my2d, col=k4, main="k=4", pch=19, cex=0.5) #' plot(my2d, col=k5, main="k=5", pch=19, cex=0.5) #' plot(my2d, col=k6, main="k=6", pch=19, cex=0.5) #' plot(my2d, col=mycol, main="true cluster", pch=19, cex=0.5) #' par(opar) #' #' @references #' \insertRef{arthur_kmeans_2007}{maotai} #' #' @export kmeanspp <- function(data, k=2){ ############################################################ # Preprocessing if (!check_datamat(data)){ stop("* kmeanspp : an input 'data' should be a matrix without any missing/infinite values.") } xdiss = stats::as.dist(cpp_pdist(data)) myk = round(k) ############################################################ # Run and Return output = hidden_kmeanspp(xdiss,k=myk)$cluster return(output) }maotai/R/lyapunov.R0000644000175000017500000000313414176771675014105 0ustar nileshnilesh#' Solve Lyapunov Equation #' #' The Lyapunov equation is of form #' \deqn{AX + XA^\top = Q} #' where \eqn{A} and \eqn{Q} are square matrices of same size. Above form is also known as \emph{continuous} form. #' This is a wrapper of \code{armadillo}'s \code{sylvester} function. #' #' @param A a \eqn{(p\times p)} matrix as above. #' @param Q a \eqn{(p\times p)} matrix as above. #' #' @return a solution matrix \eqn{X} of size \eqn{(p\times p)}. #' #' @examples #' ## simulated example #' # generate square matrices #' A = matrix(rnorm(25),nrow=5) #' X = matrix(rnorm(25),nrow=5) #' Q = A%*%X + X%*%t(A) #' #' # solve using 'lyapunov' function #' solX = lyapunov(A,Q) #' \dontrun{ #' pm1 = "* Experiment with Lyapunov Solver" #' pm2 = paste("* Absolute Error : ",norm(solX-X,"f"),sep="") #' pm3 = paste("* Relative Error : ",norm(solX-X,"f")/norm(X,"f"),sep="") #' cat(paste(pm1,"\n",pm2,"\n",pm3,sep="")) #' } #' #' @references #' \insertRef{sanderson_armadillo_2016}{maotai} #' #' \insertRef{eddelbuettel_rcpparmadillo_2014}{maotai} #' #' @export lyapunov <- function(A, Q){ ################################################################### # check square matrix if (!check_sqmat(A)){ stop("* lyapunov : an input 'A' should be a square matrix.") } if (!check_sqmat(Q)){ stop("* lyapunov : an input 'Q' should be a square matrix.") } ################################################################### # arrange for RcppArmadillo format B = t(A) C = -Q ################################################################### # pass and return return(solve_lyapunov(A,B,C)) }maotai/R/tsne.R0000644000175000017500000000376514176771675013213 0ustar nileshnilesh#' t-SNE Embedding #' #' This function is a simple wrapper of \code{\link[Rtsne]{Rtsne}} function for #' t-Stochastic Neighbor Embedding for finding low-dimensional structure of #' the data embedded in the high-dimensional space. #' #' @param data an \eqn{(n\times p)} matrix whose rows are observations. #' @param ndim an integer-valued target dimension. #' @param ... extra parameters to be used in \code{\link[Rtsne]{Rtsne}} function. #' #' @return a named list containing #' \describe{ #' \item{embed}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.} #' \item{stress}{discrepancy between embedded and origianl data as a measure of error.} #' } #' #' @examples #' \donttest{ #' ## use simple example of iris dataset #' data(iris) #' mydat = as.matrix(iris[,1:4]) #' mylab = as.factor(iris[,5]) #' #' ## run t-SNE and MDS for comparison #' iris.cmds = cmds(mydat, ndim=2) #' iris.tsne = tsne(mydat, ndim=2) #' #' ## extract coordinates and class information #' cx = iris.cmds$embed # embedded coordinates of CMDS #' tx = iris.tsne$embed # t-SNE #' #' ## visualize #' # main title #' mc = paste("CMDS with STRESS=",round(iris.cmds$stress,4),sep="") #' mt = paste("tSNE with STRESS=",round(iris.tsne$stress,4),sep="") #' #' # draw a figure #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(1,2)) #' plot(cx, col=mylab, pch=19, main=mc) #' plot(tx, col=mylab, pch=19, main=mt) #' par(opar) #' } #' #' @export tsne <- function(data, ndim=2, ...){ ############################################################ # Preprocessing if (!check_datamat(data)){ stop("* tsne : an input 'data' should be a matrix without any missing/infinite values.") } dx = (stats::as.dist(cpp_pdist(data))) kk = round(ndim) if ((length(kk)>1)||(kk<1)||(kk>=ncol(data))){ stop("* tsne : 'ndim' should be an integer in [1,col(data)). ") } ############################################################ # Run and Return output = hidden_tsne(dx, ndim=kk, ...) return(output) }maotai/R/boot.stationary.R0000644000175000017500000000453214176771675015372 0ustar nileshnilesh#' Generate Index for Stationary Bootstrapping #' #' Assuming data being dependent with cardinality \code{N}, \code{boot.stationary} returns #' a vector of index that is used for stationary bootstrapping. To describe, starting points #' are drawn from uniform distribution over \code{1:N} and the size of each block is #' determined from geometric distribution with parameter \eqn{p}. #' #' #' @param N the number of observations. #' @param p parameter for geometric distribution with the size of each block. #' #' @return a vector of length \code{N} for moving block bootstrap sampling. #' #' @examples #' \donttest{ #' ## example : bootstrap confidence interval of mean and variances #' vec.x = seq(from=0,to=10,length.out=100) #' vec.y = sin(1.21*vec.x) + 2*cos(3.14*vec.x) + rnorm(100,sd=1.5) #' data.mu = mean(vec.y) #' data.var = var(vec.y) #' #' ## apply stationary bootstrapping #' nreps = 50 #' vec.mu = rep(0,nreps) #' vec.var = rep(0,nreps) #' for (i in 1:nreps){ #' sample.id = boot.stationary(100) #' sample.y = vec.y[sample.id] #' vec.mu[i] = mean(sample.y) #' vec.var[i] = var(sample.y) #' print(paste("iteration ",i,"/",nreps," complete.", sep="")) #' } #' #' ## visualize #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(1,3), pty="s") #' plot(vec.x, vec.y, type="l", main="1d signal") # 1d signal #' hist(vec.mu, main="mean CI", xlab="mu") # mean #' abline(v=data.mu, col="red", lwd=4) #' hist(vec.var, main="variance CI", xlab="sigma") # variance #' abline(v=data.var, col="blue", lwd=4) #' par(opar) #' } #' #' @references #' \insertRef{politis_stationary_1994}{maotai} #' #' @export boot.stationary <- function(N, p=0.25){ ################################################################### # Preprocessing myn = round(N) myp = as.double(p) vec1n = 1:myn ################################################################### # Computation output = c() while (length(output) k)||(!is.vector(sub.id))){ stop("* lgpa : an input 'sub.id' should be a vector containing indices in [1,nrow(x)].") } par.scale = scale par.reflect = reflect ################################################################### # computation # 1. select out the subarray and compute means nsubid = length(sub.id) xsub = x[sub.id,,] meanvecs = list() for (i in 1:n){ meanvecs[[i]] = colMeans(xsub[,,i]) } for (i in 1:n){ xsub[,,i] = xsub[,,i] - matrix(rep(meanvecs[[i]],nsubid), ncol=m, byrow = TRUE) } # 2. compute PGA xout = shapes::procGPA(xsub, scale=par.scale, reflect = par.reflect)$rotated # if (scale){ # xout = shapes::procGPA(xsub, scale=TRUE)$rotated # } else { # xout = shapes::procGPA(xsub, scale=FALSE)$rotated # } # 3. compute rotation matrix rotmats = list() for (i in 1:n){ tgt1 = xsub[,,i] tgt2 = xout[,,i] rotmats[[i]] = aux_pinv((t(tgt1)%*%tgt1))%*%(t(tgt1)%*%tgt2) } # 4. final computation output = array(0,dim(x)) for (i in 1:n){ tgtx = x[,,i] output[,,i] = (tgtx - matrix(rep(meanvecs[[i]],k), ncol=m, byrow = TRUE))%*%(rotmats[[i]]) } ################################################################### # Report return(output) } maotai/R/zzz.R0000644000175000017500000000217214176771675013066 0ustar nileshnilesh## RETICULATE : global reference # .pkgenv <- new.env(parent = emptyenv()) .onAttach <- function(...){ ## Retrieve Year Information date <- date() x <- regexpr("[0-9]{4}", date) this.year <- substr(date, x[1], x[1] + attr(x, "match.length") - 1) # Retrieve Current Version this.version = packageVersion("maotai") ## Print on Screen packageStartupMessage("**-----------------------------------------------------------------**") packageStartupMessage("** maotai") packageStartupMessage("** - Tools for Matrix Algebra, Optimization and Inference Problems") packageStartupMessage("**") packageStartupMessage("** Version : ",this.version," (",this.year,")",sep="") packageStartupMessage("** Maintainer : Kisung You (kisungyou@outlook.com)") packageStartupMessage("** Website : https://kisungyou.com/maotai") packageStartupMessage("**") packageStartupMessage("** Please share any bugs or suggestions to the maintainer.") packageStartupMessage("**-----------------------------------------------------------------**") } .onUnload <- function(libpath) { library.dynam.unload("maotai", libpath) } maotai/R/aux_hidden_dist.R0000644000175000017500000004306214176771675015367 0ustar nileshnilesh# Hidden Functions for Future Use # these functions can be loaded using 'utils::getFromNamespace' # by the command 'getFromNamespace("function_name","maotai"); # # Here, all the functions require 'diss' object from 'stats' package. # # 01. hidden_kmedoids : PAM algorithm # hidden_kmedoids_best : PAM algorithm + use Silhouette (maximum) # 02. hidden_bmds : Bayesian Multidimensional Scaling # 03. hidden_cmds : Classical Multidimensional Scaling # 04. hidden_kmeanspp : k-means++ algorithm. # 05. hidden_tsne : t-SNE visualization. # 06. hidden_nem : Negative Eigenvalue Magnitude # 07. hidden_nef : Negative Eigenfraction # 08. hidden_emds : Euclified Multidimensional Scaling # 09. hidden_hclust : FASTCLUSTER - hclust function # 10. hidden_dbscan : DBSCAN - dbscan function # 11. hidden_silhouette : mimics that of cluster's silhouette # 12. hidden_mmds : metric multidimensional scaling by SMACOF # 13. hidden_PHATE : return row-stochastic matrix & time stamp # 14. hidden_smacof : a generalized version of SMACOF with weights # 15. hidden_hydra : Hyperbolic Distance Recovery and Approximation # 00. hidden_checker ------------------------------------------------------ #' @keywords internal #' @noRd hidden_checker <- function(xobj){ if (inherits(xobj, "dist")){ return(as.matrix(xobj)) } else if (inherits(xobj, "matrix")){ check1 = (nrow(xobj)==ncol(xobj)) check2 = isSymmetric(xobj) check3 = all(abs(diag(xobj))<.Machine$double.eps*10) if (check1&&check2&&check3){ return(as.matrix(xobj)) } else { stop("* hidden : matrix is not valid.") } } else { stop("* hidden : input is not valid.") } } # 01. hidden_kmedoids & hidden_kmedoids_best ------------------------------ #' @keywords internal #' @noRd hidden_kmedoids <- function(distobj, nclust=2){ distobj = stats::as.dist(hidden_checker(distobj)) myk = round(nclust) return(cluster::pam(distobj, k = myk)) } #' @keywords internal #' @noRd hidden_kmedoids_best <- function(distobj, mink=2, maxk=10){ # prepare kvec = seq(from=round(mink),to=round(maxk), by = 1) knum = length(kvec) svec = rep(0,knum) nobj = nrow(as.matrix(distobj)) clut = array(0,c(nobj,knum)) for (k in 1:knum){ know = kvec[k] if (know < 2){ svec[k] = 0 clut[,k] = rep(1,nobj) } else { pamx = hidden_kmedoids(distobj, nclust=kvec[k]) svec[k] = pamx$silinfo$avg.width clut[,k] = pamx$clustering } } # return the output output = list() output$opt.k = kvec[which.max(svec)] output$score = svec # knum-vector of silhouette scores output$label = clut # (n,knum) cluster labels return(output) } # 02. hidden_bmds --------------------------------------------------------- #' @keywords internal #' @noRd hidden_bmds <- function(x, ndim=2, par.a=5, par.alpha=0.5, par.step=1, mc.iter=8128, verbose=FALSE){ ###################################################### # Initialization ndim = round(ndim) embedy = hidden_cmds(x, ndim)$embed x = as.matrix(x) ndim = round(ndim) if ((length(ndim)>1)||(ndim<1)||(ndim>=nrow(x))){ stop("* bmds - 'ndim' should be an integer in [1,ncol(data)). ") } n = nrow(x) m = n*(n-1)/2 ###################################################### # Preliminary Computation # 1. apply CMDS for initialization y = as.matrix(base::scale(embedy, # (N x ndim) centered center=TRUE, scale=FALSE)) Delta = as.matrix(stats::dist(y)) # (N x N) pairwise distances # 2. initialization eigy = base::eigen(stats::cov(y)) X0 = y%*%eigy$vectors # (N x ndim) rotated gamma0 = diag(X0) # variances ? sigg0 = compute_SSR(x, Delta)/m; beta0 = apply(X0,2,var)/2 # 3. run the main part runcpp <- main_bmds(x, X0, sigg0, par.a, par.alpha, mc.iter, par.step, verbose, beta0) Xsol <- runcpp$solX Xdist <- as.matrix(stats::dist(Xsol)) output = list() output$embed = Xsol output$stress = compute_stress(x, Xdist) return(output) # return Rcpp::List::create(Rcpp::Named("solX")=Xsol,Rcpp::Named("solSSR")=SSRsol); } # 03. hidden_cmds --------------------------------------------------------- #' @keywords internal #' @noRd hidden_cmds <- function(x, ndim=2){ ##################################################3 # Check Input and Transform x = hidden_checker(x) ndim = round(ndim) k = as.integer(ndim) D2 = (x^2) # now squared matrix n = nrow(D2) if ((length(ndim)>1)||(ndim<1)||(ndim>=nrow(x))){ stop("* cmds : 'ndim' should be an integer in [1,ncol(data)). ") } ##################################################3 # Computation J = diag(n) - (1/n)*outer(rep(1,n),rep(1,n)) B = -0.5*J%*%D2%*%J eigB = eigen(B) LL = eigB$values[1:k] EE = eigB$vectors[,1:k] # Y = as.matrix(base::scale((EE%*%diag(sqrt(LL))), center=TRUE, scale=FALSE)) Y = EE%*%diag(sqrt(LL)) DY = as.matrix(stats::dist(Y)) output = list() output$embed = Y output$stress = compute_stress(x, DY) return(output) } # 04. hidden_kmeanspp ----------------------------------------------------- #' @keywords internal #' @noRd hidden_kmeanspp <- function(x, k=2){ ##################################################3 # Check Input and Transform x = hidden_checker(x) n = nrow(x) K = round(k) if (K >= n){ stop("* kmeanspp : 'k' should be smaller than the number of observations.") } if (K < 2){ stop("* kmeanspp : 'k' should be larger than 1.") } id.now = 1:n ##################################################3 # Computation # initialize id.center = base::sample(id.now, 1) id.now = base::setdiff(id.now, id.center) # iterate for (i in 1:(K-1)){ # compute distance to the nearest tmpdmat = x[id.now, id.center] if (i==1){ d2vec = as.vector(tmpdmat)^2 d2vec = d2vec/base::sum(d2vec) } else { d2vec = as.vector(base::apply(tmpdmat, 1, base::min))^2 d2vec = d2vec/base::sum(d2vec) } # sample one id.tmp = base::sample(id.now, 1, prob=d2vec) # update id.center = c(id.center, id.tmp) id.now = base::setdiff(id.now, id.tmp) } # let's compute label dmat = x[,id.center] cluster = base::apply(dmat, 1, base::which.min) ################################################## # Return output = list() output$center = id.center output$cluster = cluster return(output) } # 05. hidden_tsne --------------------------------------------------------- #' @keywords internal #' @noRd hidden_tsne <- function(dx, ndim=2, ...){ ################################################## # Pass to 'Rtsne' dx = hidden_checker(dx) k = round(ndim) tmpout = Rtsne::Rtsne(dx, dims=k, ..., is_distance=TRUE) Y = tmpout$Y DY = as.matrix(stats::dist(Y)) ################################################## # Return output = list() output$embed = Y output$stress = compute_stress(dx, DY) return(output) } # 06. hidden_nem ---------------------------------------------------------- #' @keywords internal #' @noRd hidden_nem <- function(xdiss){ ##################################################3 # Check Input and Transform xx = hidden_checker(xdiss) D2 = (xx^2) n = nrow(D2) ##################################################3 # Computation H = diag(n) - (1/n)*base::outer(rep(1,n),rep(1,n)) S = -0.5*(H%*%D2%*%H) eigS = base::eigen(S, only.values = TRUE) evals = eigS$values ##################################################3 # Finalize output = abs(min(evals))/max(evals) return(output) } # 07. hidden_nef ---------------------------------------------------------- #' @keywords internal #' @noRd hidden_nef <- function(xdiss){ ##################################################3 # Check Input and Transform xx = hidden_checker(xdiss) D2 = (xx^2) n = nrow(D2) ##################################################3 # Computation H = diag(n) - (1/n)*base::outer(rep(1,n),rep(1,n)) S = -0.5*(H%*%D2%*%H) eigS = base::eigen(S) evals = eigS$values ##################################################3 # Finalize output = sum(abs(evals[which(evals<0)]))/sum(abs(evals)) return(output) } # 08. hidden_emds --------------------------------------------------------- #' Euclified Multidimensional Scaling #' #' strategy 1 : transitive closure of the triangle inequality (labdsv) #' strategy 2 : Non-Euclidean or Non-metric Measures Can Be Informative; adding positive numbers to all off-diagonal entries #' #' @keywords internal #' @noRd hidden_emds <- function(xdiss, ndim=2, method=c("closure","gram")){ ##################################################3 # Check Input and Transform x = hidden_checker(xdiss) ndim = round(ndim) k = as.integer(ndim) n = nrow(x) if ((length(ndim)>1)||(ndim<1)||(ndim>=nrow(x))){ stop("* emds : 'ndim' should be an integer in [1,nrow(x)). ") } method = match.arg(method) mydim = round(ndim) ##################################################3 # Branching if (hidden_nef(x) < 100*.Machine$double.eps){ # if Euclidean, okay output = hidden_cmds(x, ndim=mydim) } else { # if not Euclidean if (method=="closure"){ # strategy 1 : transitive closure of the triangle inequality xnew = as.matrix(labdsv::euclidify(stats::as.dist(x))) # well it seems to work well.. output = hidden_cmds(xnew, ndim = mydim) } else { # strategy 2 : add positive numbers to all off-diagonal entries gamma0 = emds_gamma0(x) ggrid = seq(from=min(0.001, gamma0/1000), to=(gamma0*0.999), length.out=20) # just try 20 cases vgrid = rep(0,20) for (i in 1:20){ xtmp = x + ggrid[i] diag(xtmp) = rep(0,nrow(xtmp)) vgrid[i] = hidden_nef(xtmp) } idopts = which.min(vgrid) if (length(idopts)>1){ # if multiple, use the first one. idopts = idopts[1] } optgamma = ggrid[idopts] xnew = x + optgamma diag(xnew) = rep(0,nrow(xnew)) output = hidden_cmds(xnew, ndim = mydim) } } ##################################################3 # Report return(output) } # 09. hidden_hclust ------------------------------------------------------- #' @keywords internal #' @noRd hidden_hclust <- function(xdiss, mymethod, mymembers){ return(fastcluster::hclust(xdiss, method=mymethod, members=mymembers)) } # 10. hidden_dbscan ------------------------------------------------------- #' @keywords internal #' @noRd hidden_dbscan <- function(Xdiss, myeps, myminPts=5, ...){ output = dbscan::dbscan(Xdiss, eps = myeps, minPts=myminPts, ...) return(output) } # 11. hidden_silhouette -------------------------------------------------------- #' @keywords internal #' @noRd hidden_silhouette <- function(xdiss, label){ x = as.integer(as.factor(label)) hsil = cluster::silhouette(x, xdiss) output = list() output$local = as.vector(hsil[,3]) output$global = base::mean(as.vector(hsil[,3])) return(output) } # 12. hidden_mmds : metric multidimensional scaling by SMACOF -------- # note that from version 0.2.2, I"m using a version from the modern MDS book. #' @keywords internal #' @noRd hidden_mmds <- function(x, ndim=2, maxiter=200, abstol=1e-5){ # Check Input and Transform x = hidden_checker(x) n = base::nrow(x) ndim = round(ndim) myiter = max(50, round(maxiter)) mytol = max(100*.Machine$double.eps, as.double(abstol)) WW = array(1,c(n,n)) return(as.matrix(src_smacof(x, WW, ndim, myiter, mytol, TRUE)$embed)) # # Run with Rcpp # return(cpp_mmds(x, ndim, myiter, mytol)) } # 13. hidden_PHATE -------------------------------------------------------- #' @keywords internal #' @noRd hidden_PHATE <- function(x, nbdk=5, alpha=2){ # Check Input and Transform x = hidden_checker(x) # now it's a matrix n = base::nrow(x) nbdk = max(1, round(nbdk)) alpha = max(sqrt(.Machine$double.eps), as.double(alpha)) # k-th nearest distance nndist = rep(0,n) for (i in 1:n){ tgt = as.vector(x[i,]) nndist[i] = tgt[order(tgt)[nbdk+1]] } # Build Kernel Matrix matK = array(1,c(n,n)) for (i in 1:(n-1)){ for (j in (i+1):n){ term1 = exp(-((x[i,j]/nndist[i])^(alpha))) term2 = exp(-((x[i,j]/nndist[j])^(alpha))) matK[i,j] <- matK[j,i] <- 0.5*(term1+term2) } } vecD = base::rowSums(matK) matP = base::diag(1/vecD)%*%matK matA = base::diag(1/base::sqrt(vecD))%*%matK%*%base::diag(1/base::sqrt(vecD)) # Eigenvalues and Von-Neumann Entropy eigA = eigen(matA)$values eigA = eigA[(eigA>0)] vec.t = 1:1000 vec.H = rep(0,1000) for (i in 1:1000){ eig.t = (eigA^i) + (1e-7) # modified for zero-padding eig.t = eig.t/base::sum(eig.t) term.t = -base::sum(eig.t*base::log(eig.t)) if (is.na(term.t)){ vec.t = vec.t[1:(i-1)] vec.H = vec.H[1:(i-1)] break } else { vec.H[i] = term.t } } # Optimal Stopping Criterion Pout = matP opt.t = round(hidden_knee_clamped(vec.t, vec.H)) for (i in 1:(opt.t-1)){ Pout = Pout%*%matP } # return the output output = list() output$P = Pout output$t = opt.t return(output) } # X = as.matrix(iris[,1:4]) # lab = as.factor(iris[,5]) # # D = stats::dist(X) # cmd2 = cmdscale(D, k=2) # mmdA = hidden_mmds(D, ndim=2, abstol=1e-2) # mmdB = hidden_mmds(D, ndim=2, abstol=1e-10) # # par(mfrow=c(1,3), pty="s") # plot(cmd2, col=lab, main = "cmds") # plot(mmdA, col=lab, main="mmds-2") # plot(mmdB, col=lab, main="mmds-8") # # example ----------------------------------------------------------------- # library(labdsv) # data(bryceveg) # returns a vegetation data.frame # dis.bc <- as.matrix(dsvdis(bryceveg,'bray/curtis')) # calculate a Bray/Curtis # # emds = getFromNamespace("hidden_emds","maotai") # out.cmds <- cmds(dis.bc, ndim=2)$embed # out.emds1 <- emds(dis.bc, ndim=2, method="closure")$embed # out.emds2 <- emds(dis.bc, ndim=2, method="gram")$embed # # par(mfrow=c(3,1)) # plot(out.cmds, main="cmds") # plot(out.emds1, main="emds::closure") # plot(out.emds2, main="emds::gram") # 14. hidden_smacof : a generalized version of SMACOF with weights ====== # returns both {embed} and {stress} #' @keywords internal #' @noRd hidden_smacof <- function(D, W=NULL, ndim=2, maxiter=100, abstol=(1e-7)){ myiter = round(maxiter) mytol = as.double(abstol) myndim = round(ndim) DD = hidden_checker(D) # now it's a matrix nn = base::nrow(DD) if (is.null(W)&&(length(W)==0)){ use.gutman = TRUE WW = array(1,c(nn,nn)) } else { use.gutmat = FALSE WW = as.matrix(W) } output = src_smacof(DD, WW, myndim, myiter, mytol, use.gutman) return(output) } # fun_cmds <- utils::getFromNamespace("hidden_cmds", "maotai") # fun_mmds <- utils::getFromNamespace("hidden_mmds", "maotai") # fun_smacof <- utils::getFromNamespace("hidden_smacof", "maotai") # # D = stats::dist(as.matrix(iris[,1:4])) # lab = factor(iris[,5]) # # Yc = fun_cmds(D)$embed # Ym = fun_mmds(D, maxiter=500) # Ys = fun_smacof(D, maxiter=500)$embed # # par(mfrow=c(1,3), pty="s") # plot(Yc, col=lab, pch=19, main="CMDS") # plot(Ym, col=lab, pch=19, main="MMDS") # plot(Ys, col=lab, pch=19, main="smacof") # 15. hidden_hydra -------------------------------------------------------- # note that 'iso.adjust=TRUE' is a direct application of the algorithm # as stated in the paper. For 2-dimensional embedding, 'FALSE' is default # in the 'hydra' package's implementation. #' @keywords internal #' @noRd hidden_hydra <- function(distobj, ndim=2, kappa=1, iso.adjust=TRUE){ # preprocess D = as.matrix(distobj) n = base::nrow(D) dim = round(ndim) kappa = base::max(base::sqrt(.Machine$double.eps), as.double(kappa)) A = base::cosh(base::sqrt(kappa)*D) # eigen-decomposition : 100 dimensions if (n < 100){ # regular 'base::eigen' eigA = base::eigen(A, TRUE) # top vector x0 = sqrt(eigA$values[1])*as.vector(eigA$vectors[,1]) if (x0[1] < 0){ x0 = -x0 } # others bot_vals = eigA$values[(n-dim+1):n] bot_vecs = eigA$vectors[,(n-dim+1):n] } else { # or use 'RSpectra::eigs_sym' eig_top = RSpectra::eigs_sym(A, 1, which="LA") eig_bot = RSpectra::eigs_sym(A, ndim, which="SA") # top vector x0 = sqrt(eig_top$values)*as.vector(eig_top$vectors) if (x0[1] < 0){ x0 = -x0 } # others bot_vecs = eig_bot$vectors bot_vals = eig_bot$values } # component : radial x_min = base::min(x0) radial = sqrt((x0-x_min)/(x0+x_min)) # component : directional if (iso.adjust){ X_last = bot_vecs%*%diag(sqrt(pmax(-bot_vals,0))) sqnorms = base::apply(X_last, 1, function(x) 1/sqrt(sum(x^2))) directional = base::diag(sqnorms)%*%X_last } else { sqnorms = base::apply(bot_vecs, 1, function(x) 1/sqrt(sum(x^2))) directional = base::diag(sqnorms)%*%bot_vecs } # return the output return(list(radial=radial, directional=directional)) } # library(hydra) # data(karate) # D = as.dist(karate$distance) # # X = as.matrix(iris[,1:4]) # D = stats::dist(X) # Y = factor(iris[,5]) # # run_hydra = hydra(as.matrix(D), equi.adj =-1, alpha = 1, curvature = 5) # my_hydra = hidden_hydra(D, kappa=5, iso.adjust = FALSE) # # X1 = cbind(run_hydra$r*cos(run_hydra$theta), run_hydra$r*sin(run_hydra$theta)) # X2 = diag(my_hydra$radial)%*%my_hydra$directional # # par(mfrow=c(1,3), pty="s") # plot(run_hydra) # plot(X1, pch=19, xlim=c(-1,1), ylim=c(-1,1), col=Y, main="HYDRA package") # plot(X2, pch=19, xlim=c(-1,1), ylim=c(-1,1), col=Y, main="my implementation") maotai/R/cmds.R0000644000175000017500000000265414176771675013164 0ustar nileshnilesh#' Classical Multidimensional Scaling #' #' Classical multidimensional scaling aims at finding low-dimensional structure #' by preserving pairwise distances of data. #' #' @param data an \eqn{(n\times p)} matrix whose rows are observations. #' @param ndim an integer-valued target dimension. #' #' @return a named list containing #' \describe{ #' \item{embed}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.} #' \item{stress}{discrepancy between embedded and origianl data as a measure of error.} #' } #' #' @examples #' ## use simple example of iris dataset #' data(iris) #' idata = as.matrix(iris[,1:4]) #' icol = as.factor(iris[,5]) # class information #' #' ## run Classical MDS #' iris.cmds = cmds(idata, ndim=2) #' #' ## visualize #' opar <- par(no.readonly=TRUE) #' plot(iris.cmds$embed, col=icol, #' main=paste0("STRESS=",round(iris.cmds$stress,4))) #' par(opar) #' #' @references #' \insertRef{torgerson_multidimensional_1952}{maotai} #' #' @export cmds <- function(data, ndim=2){ ############################################################ # Preprocessing if (!check_datamat(data)){ stop("* cmds : an input 'data' should be a matrix without any missing/infinite values.") } xdiss = stats::as.dist(cpp_pdist(data)) ############################################################ # Run and Return mydim = round(ndim) output = hidden_cmds(xdiss, ndim=mydim) return(output) }maotai/R/epmeans.R0000644000175000017500000000635114176771675013664 0ustar nileshnilesh#' EP-means Algorithm for Clustering Empirical Distributions #' #' EP-means is a variant of k-means algorithm adapted to cluster #' multiple empirical cumulative distribution functions under metric structure #' induced by Earth Mover's Distance. #' #' @param elist a length \eqn{N} list of either vector or \code{ecdf} objects. #' @param k the number of clusters. #' #' @return a named list containing \describe{ #' \item{cluster}{an integer vector indicating the cluster to which each \code{ecdf} is allocated.} #' \item{centers}{a length \eqn{k} list of centroid \code{ecdf} objects.} #' } #' #' @examples #' \donttest{ #' ## two sets of 1d samples, 10 each and add some noise #' # set 1 : mixture of two gaussians #' # set 2 : single gamma distribution #' #' # generate data #' elist = list() #' for (i in 1:10){ #' elist[[i]] = stats::ecdf(c(rnorm(100, mean=-2), rnorm(50, mean=2))) #' } #' for (j in 11:20){ #' elist[[j]] = stats::ecdf(rgamma(100,1) + rnorm(100, sd=sqrt(0.5))) #' } #' #' # run EP-means with k clusters #' # change the value below to see different settings #' myk = 2 #' epout = epmeans(elist, k=myk) #' #' # visualize #' opar = par(no.readonly=TRUE) #' par(mfrow=c(1,myk)) #' for (k in 1:myk){ #' idk = which(epout$cluster==k) #' for (i in 1:length(idk)){ #' if (i<2){ #' pm = paste("class ",k," (size=",length(idk),")",sep="") #' plot(elist[[idk[i]]], verticals=TRUE, lwd=0.25, do.points=FALSE, main=pm) #' } else { #' plot(elist[[idk[i]]], add=TRUE, verticals=TRUE, lwd=0.25, do.points=FALSE) #' } #' plot(epout$centers[[k]], add=TRUE, verticals=TRUE, lwd=2, col="red", do.points=FALSE) #' } #' } #' par(opar) #' } #' #' @references #' \insertRef{henderson_epmeans_2015}{maotai} #' #' @export epmeans <- function(elist, k=2){ ############################################### # Preprocessing clist = elist_epmeans(elist) # will use quantized ones only / flist = elist_fform(qlist) myk = round(k) myn = length(clist) # Quantization mylength = 1000 qseq = seq(from=1e-6, to=1-(1e-6), length.out=mylength) qmat = array(0,c(myn,mylength)) for (n in 1:myn){ qmat[n,] = as.vector(stats::quantile(clist[[n]], qseq)) } ############################################### # Rcpp k-means tmpcpp = cpp_kmeans(qmat, myk)$means ############################################### # Pairwise Distance Computation # wrap mylist1 = list() mylist2 = list() for (n in 1:myn){ mylist1[[n]] = stats::ecdf(as.vector(qmat[n,])) } for (k in 1:myk){ mylist2[[k]] = stats::ecdf(as.vector(tmpcpp[k,])) } # compute pairwise distance using Earth Mover's Distance pdistmat = dist2_wasserstein(mylist1, mylist2, 1) # index label = base::apply(pdistmat, 1, which.min) ############################################### # Return : we want to add 'Silhouette' output = list() output$cluster = as.integer(label) output$centers = mylist2 return(output) } # ## personal examples # cdf0 = stats::ecdf(rnorm(100, sd=3)) # original ECDF # qseq = seq(from=0,to=1,length.out=1000) # quantile sequence # quant = stats::quantile(cdf0, qseq) # cdf1 = stats::ecdf(quant) # # par(mfrow=c(1,2)) # plot(cdf0, main="Original") # plot(cdf1, main="Recovered") maotai/R/trio.R0000644000175000017500000002241214176771675013205 0ustar nileshnilesh#' Trace Ratio Optimation #' #' This function provides several algorithms to solve the following problem #' \deqn{\textrm{max} \frac{tr(V^\top A V)}{tr(V^\top B V)} \textrm{ such that } V^\top C V = I} #' where \eqn{V} is a projection matrix, i.e., \eqn{V^\top V = I}. Trace ratio optimization #' is pertained to various linear dimension reduction methods. It should be noted that #' when \eqn{C = I}, the above problem is often reformulated as a generalized eigenvalue problem #' since it's an easier proxy with faster computation. #' #' @param A a \eqn{(p\times p)} symmetric matrix in the numerator term. #' @param B a \eqn{(p\times p)} symmetric matrix in the denomiator term. #' @param C a \eqn{(p\times p)} symmetric constraint matrix. If not provided, it is set as identical matrix automatically. #' @param dim an integer for target dimension. It can be considered as the number of loadings. #' @param method the name of algorithm to be used. Default is \code{2003Guo}. #' @param maxiter maximum number of iterations to be performed. #' @param eps stopping criterion for iterative algorithms. #' #' @return a named list containing #' \describe{ #' \item{V}{a \eqn{(p\times dim)} projection matrix.} #' \item{tr.val}{an attained maximum scalar value.} #' } #' #' @examples #' ## simple test #' # problem setting #' p = 5 #' mydim = 2 #' A = matrix(rnorm(p^2),nrow=p); A=A%*%t(A) #' B = matrix(runif(p^2),nrow=p); B=B%*%t(B) #' C = diag(p) #' #' # approximate solution via determinant ratio problem formulation #' eigAB = eigen(solve(B,A)) #' V = eigAB$vectors[,1:mydim] #' eigval = sum(diag(t(V)%*%A%*%V))/sum(diag(t(V)%*%B%*%V)) #' #' # solve using 4 algorithms #' m12 = trio(A,B,dim=mydim, method="2012Ngo") #' m09 = trio(A,B,dim=mydim, method="2009Jia") #' m07 = trio(A,B,dim=mydim, method="2007Wang") #' m03 = trio(A,B,dim=mydim, method="2003Guo") #' #' # print the results #' line1 = '* Evaluation of the cost function' #' line2 = paste("* approx. via determinant : ",eigval,sep="") #' line3 = paste("* trio by 2012Ngo : ",m12$tr.val, sep="") #' line4 = paste("* trio by 2009Jia : ",m09$tr.val, sep="") #' line5 = paste("* trio by 2007Wang : ",m07$tr.val, sep="") #' line6 = paste("* trio by 2003Guo : ",m03$tr.val, sep="") #' cat(line1,"\n",line2,"\n",line3,"\n",line4,"\n",line5,"\n",line6) #' #' @references #' \insertRef{guo_generalized_2003}{maotai} #' #' \insertRef{wang_trace_2007}{maotai} #' #' \insertRef{yangqingjia_trace_2009}{maotai} #' #' \insertRef{ngo_trace_2012}{maotai} #' #' @export trio <- function(A, B, C, dim=2, method=c("2003Guo","2007Wang","2009Jia","2012Ngo"), maxiter=1000, eps=1e-10){ ################################################################### # not completed yet. if (missing(C)){ C = diag(nrow(A)) myflag = TRUE } else { myflag = FALSE } if (!check_symm(A)){ stop("* trio : an input matrix 'A' should be a square, symmetric matrix.") } if (!check_symm(B)){ stop("* trio : an input matrix 'B' should be a square, symmetric matrix.") } if (!check_symm(C)){ stop("* trio : an input matrix 'C' should be a square, symmetric matrix.") } sizes = rep(0,3) sizes[1] = nrow(A) sizes[2] = nrow(B) sizes[3] = nrow(C) if (length(unique(sizes))!=1){ stop("* trio : all input matrices should be of same size.") } if (!myflag){ eigC = eigen(C) Cinv2 = eigC$vectors%*%diag(1/sqrt(eigC$values))%*%t(eigC$vectors) A = Cinv2%*%A%*%Cinv2 B = Cinv2%*%B%*%Cinv2 } # 2009 Jia's note : B should have rank >= (m-d) if (as.integer(Matrix::rankMatrix(B))<(nrow(B)-dim)){ warning("* trio : null space of 'B' is excessive. trace ratio value may diverge.") } ################################################################### # switch case V = switch(method, "2007Wang" = trio2007Wang(A, B, dim, eps, maxiter), "2003Guo" = trio2003Guo(A, B, dim, eps, maxiter), "2009Jia" = trio2009Jia(A, B, dim, eps, maxiter), "2012Ngo" = trio2012Ngo(A, B, dim, eps, maxiter)) output = list() output$V = V output$tr.val = sum(diag(t(V)%*%A%*%V))/sum(diag(t(V)%*%B%*%V)) return(output) } # subroutines ------------------------------------------------------------- #' 2003 Guo et al. #' Title : A generalized Foley-Sammon transform based on generalized fisher discriminant ... #' #' @keywords internal #' @noRd trio2003Guo <- function(A, B, dim, eps, maxiter){ ## translate into the language d = dim Sp = A Sl = B ## bisection # 1. initialization lbd1 = 0; f1 = evalGuoDiff(lbd1, Sp, Sl, d) lbd2 = 1; f2 = evalGuoDiff(lbd2, Sp, Sl, d) if (f2 >= 0){ while (f2 > 0){ lbd1 = lbd2; f1 = f2; lbd2 = lbd2*2; f2 = evalGuoDiff(lbd2, Sp, Sl, d) } } for (i in 1:maxiter){ lbdm = (lbd1+lbd2)/2 fm = evalGuoDiff(lbdm, Sp, Sl, d) if (fm > 0){ lbd1 = lbdm f1 = fm } else { lbd2 = lbdm f2 = fm } if (abs(lbd1-lbd2) < eps){ break } } lbdm = (lbd1+lbd2)/2 # W = eigen(Sp-lbdm*Sl)$vectors[,1:d] ## use RSpectra for only top 'd' ones W = RSpectra::eigs(Sp-lbdm*Sl,d,which="LR")$vectors ## let's try to return ! return(W) } #' @keywords internal #' @noRd evalGuoDiff <- function(lbd, A, B, dim){ W = RSpectra::eigs(A-lbd*B,dim,which="LR")$vectors # W = eigen(A-lbd*B)$vectors[,1:dim] ## use RSpectra for only top 'd' ones return(sum(diag(t(W)%*%(A-lbd*B)%*%W))) } #' 2007 Wang et al. #' Title : Trace Ratio vs. Ratio Trace for Dimensionality Reduction #' #' @keywords internal #' @noRd trio2007Wang <- function(A, B, dim, eps, maxiter){ ## translate into this language Sp = A St = A+B m = nrow(A) d = dim eigSt = base::eigen(St, symmetric = TRUE) mm = sum(eigSt$values > 0) if (mm < 1){ stop("* (A+B) has at least one nonnegative eigenvalues.") } U = eigSt$vectors[,1:mm] ## transform into the problem of V now. Spu = t(U)%*%Sp%*%U Stu = t(U)%*%St%*%U Vold = qr.Q(qr(matrix(rnorm(mm*d),ncol=d))) ## main computation V = cppsub_2007Wang(Vold, mm, d, Spu, Stu, maxiter, eps) ## adjust back to the original problem W = U%*%V ## let's try to return ! return(W) } #' 2009 Jia et al #' Title : Trace Ratio Problem Revisited (DNM) #' #' @keywords internal #' @noRd trio2009Jia <- function(A, B, dim, eps, maxiter){ ## translate into the language d = dim Sp = A Sl = B ## Decomposed Newton Method lbdold = 0 for (i in 1:maxiter){ ## 1. compute eigendecomposition eigS = RSpectra::eigs(Sp-lbdold*Sl,d,which="LR") top.val = eigS$values # top 'd' eigenvalues top.vec = eigS$vectors # top 'd' eigenvectors ## 2. lbdnew lbdnew = solve2009Jia(lbdold, top.val, top.vec, Sl) inc = abs(lbdnew-lbdold) ## 3. updating information lbdold = lbdnew if (inc 0} for \eqn{i\neq j}, and #' (3) \eqn{d_{ij} = d_{ji}}. #' #' @param d \code{"dist"} object or \eqn{(N\times N)} matrix of pairwise distances. #' #' @return a logical; \code{TRUE} if it satisfies metric property, \code{FALSE} otherwise. #' #' @examples #' ## Let's use L2 distance matrix of iris dataset #' data(iris) #' dx = as.matrix(stats::dist(iris[,1:4])) #' #' # perturb d(i,j) #' dy = dx #' dy[1,2] <- dy[2,1] <- 10 #' #' # run the algorithm #' checkdist(dx) #' checkdist(dy) #' #' @seealso \code{\link{checkmetric}} #' @export checkdist <- function(d){ if (inherits(d, "dist")){ d = as.matrix(d) } else { if (!is.matrix(d)){ stop("* checkdist : input 'd' should be a matrix.") } } # 1. square matrix if (nrow(d)!=ncol(d)){ message(" checkdist : input 'd' is not a square matrix.") return(FALSE) } # 2. zero diagonals if (any(diag(d)!=0)){ message(" checkdist : input 'd' has non-zero diagonals.") return(FALSE) } # 3. all positive elements if (any(d < 0)){ message(" checkdist : input 'd' has negative values.") return(FALSE) } # 4. symmetric if (!base::isSymmetric(d)){ message(" checkdist : input 'd' is not symmetric.") return(FALSE) } return(TRUE) }maotai/R/aux_computation.R0000644000175000017500000000424414176771675015452 0ustar nileshnilesh# auxiliary functions for computation ------------------------------------- # (1) aux_pinv : pseudo-inverse # (2) aux_pseudomean : compute distance from 1st observation to pseudo mean by rest points # (1) aux_pinv ------------------------------------------------------------ #' @keywords internal aux_pinv <- function(A){ svdA = base::svd(A) tolerance = (.Machine$double.eps)*max(c(nrow(A),ncol(A)))*as.double(max(svdA$d)) idxcut = which(svdA$d <= tolerance) invDvec = (1/svdA$d) invDvec[idxcut] = 0 output = (svdA$v%*%diag(invDvec)%*%t(svdA$u)) return(output) } # (2) aux_pseudomean ------------------------------------------------------ #' @keywords internal aux_pseudomean <- function(dmat){ # we need embedding .. umm .. automatic dimension selection if (nrow(dmat)==1){ stop("* aux_pseudomean : error..") } else if (nrow(dmat)==2){ return(dmat[1,2]) } else { embedded = aux_pseudomean_auto(dmat) n = nrow(embedded) p = ncol(embedded) # centering based on other points emcenter = as.vector(base::colMeans(embedded[2:n,])) embednew = embedded - matrix(rep(emcenter,n), ncol=p, byrow=TRUE) # compute scalar d1mat = dmat[2:n,2:n] # d(x,y) d2mat = as.matrix(stats::dist(embednew[2:n,])) # ||x-y|| d12mat = (d1mat*d2mat) d22mat = (d2mat^2) dlower = base::lower.tri(d12mat) cstar =sum(d12mat[dlower])/sum(d22mat[dlower]) # update embednew and compute erow1 = cstar*as.vector(embednew[1,]) return(sqrt(sum(erow1^2))) } } #' @keywords internal #' @noRd aux_pseudomean_auto <- function(dmat){ # only positive eigenvalues' part n = nrow(dmat) J = diag(rep(1,n))-(1/n)*outer(rep(1,n),rep(1,n)) B = -(J%*%(dmat^2)%*%J)/2.0 eigB = base::eigen(B, symmetric = TRUE) # decreasing order m = max(length(which(eigB$values > 0)),2) X = (eigB$vectors[,1:m])%*%(base::diag(sqrt(eigB$values[1:m]))) return(X) } # # personal test : seems like it's working well enough # x = rnorm(5, mean=3) # y = matrix(rnorm(10*5),ncol=5) # # dmat = as.matrix(dist(rbind(x,y))) # val.alg = aux_pseudomean(dmat) # val.true = sqrt(sum((x-as.vector(colMeans(y)))^2)) maotai/R/aux_hidden_compute.R0000644000175000017500000000722114176771675016075 0ustar nileshnilesh# Hidden Functions : Computation # # (01) hidden_gaussbary_2002R # - Ruschendorf, Uckelmann (2002) : On the n-coupling problem algorithm # # (02) hidden_gaussbary_2016A # - Alvarez-Esteban et al (2016) : A fixed-point approach to barycenters in Wasserstein space # - See Theorem 4.2 for the updating rules. # # (03) hidden_projsimplex_sorting : method by Wang and Carreira-Perpinan # Held, P. Wolfe, and H. Crowder, “Validation of subgradient optimization,” Mathematical Programming, vol. 6, pp. 62–88, 1974 # (above is known by https://www.optimization-online.org/DB_FILE/2014/08/4498.pdf) # url : https://eng.ucmerced.edu/people/wwang5/papers/SimplexProj.pdf # (01) hidden_gaussbary_2002R --------------------------------------------- # array3d : (p x p x N) array of SPD matrices. # weight : N vector of weights. C++ handles L1 normalization. # # RETURN a list containing # $mean : (p x p) barycenter matrix # $iter : the number of iterations #' @keywords internal #' @noRd hidden_gaussbary_2002R <- function(array3d, weight, maxiter=50, abstol=(1e-8)){ # minimal check weight = as.vector(weight) if (length(weight)!=dim(array3d)[3]){ stop("* hidden_gaussbary_2002R : non-matching weight.") } # compute return(src_gaussbary_2002R(array3d, weight, maxiter, abstol)) } # (02) hidden_gaussbary_2016A --------------------------------------------- # array3d : (p x p x N) array of SPD matrices. # weight : N vector of weights. C++ handles L1 normalization. # # RETURN a list containing # $mean : (p x p) barycenter matrix # $iter : the number of iterations #' @keywords internal #' @noRd hidden_gaussbary_2016A <- function(array3d, weight, maxiter=50, abstol=(1e-8)){ # minimal check weight = as.vector(weight) if (length(weight)!=dim(array3d)[3]){ stop("* hidden_gaussbary_2016A : non-matching weight.") } # compute return(src_gaussbary_2016A(array3d, weight, maxiter, abstol)) } # # test scenario 1 : random near-identity covariances # # test scenario 2 : generate randomly some data + add noise # p = 10 # n = 50 # noise = 0.01 # # somedat = matrix(runif(50*p), ncol=p) # mycovs = array(0, c(p,p,n)) # for (i in 1:n){ # mycovs[,,i] = cov(somedat + matrix(rnorm(50*p), ncol=p)*noise) # # print(paste0("The ",i,"-th matrix has rank=",round(Matrix::rankMatrix(mycovs[,,i])))) # } # myweight = rep(1/n, n) # # # res_2002 = hidden_gaussbary_2002R(mycovs, myweight) # res_2016 = hidden_gaussbary_2016A(mycovs, myweight) # par(mfrow=c(1,3), pty="s") # image(cov(somedat), main="true covariance") # image(res_2002$mean, main=paste0("02R:iter=",round(res_2002$iter),"/error=",round(norm(res_2002$mean-cov(somedat),"F"),3))) # image(res_2016$mean, main=paste0("16A:iter=",round(res_2016$iter),"/error=",round(norm(res_2016$mean-cov(somedat),"F"),3))) # # microbenchmark(res_2002 = hidden_gaussbary_2002R(mycovs, myweight), # res_2016 = hidden_gaussbary_2016A(mycovs, myweight)) # (03) hidden_projsimplex_sorting --------------------------------------------- #' @keywords internal #' @noRd hidden_projsimplex_sorting <- function(input){ # preprocessing y = as.vector(input) D = length(y) u = base::sort(y, decreasing = TRUE) # main part uvec = rep(0,D) for (j in 1:D){ uvec[j] = u[j] + (1/j)*(1-base::sum(u[1:j])) } rho = max(which(uvec > 0)) lbd = (1/rho)*(1 - base::sum(u[1:rho])) # finalize x = pmax(y+lbd, 0) return(x) } # xx = matrix(rnorm(1000*2), ncol=2) # yy = array(0,c(1000,2)) # for (i in 1:1000){ # yy[i,] = hidden_projsimplex_13W(xx[i,]) # } # plot(xx, main="scatter") # points(yy, pch=19, col="red") maotai/R/sylvester.R0000644000175000017500000000326414176771675014274 0ustar nileshnilesh#' Solve Sylvester Equation #' #' The Sylvester equation is of form #' \deqn{AX + XB = C} #' where \eqn{X} is the unknown and others are given. Though it's possible to have non-square \eqn{A} and \eqn{B} matrices, #' we currently support square matrices only. This is a wrapper of \code{armadillo}'s \code{sylvester} function. #' #' @param A a \eqn{(p\times p)} matrix as above. #' @param B a \eqn{(p\times p)} matrix as above. #' @param C a \eqn{(p\times p)} matrix as above. #' #' @return a solution matrix \eqn{X} of size \eqn{(p\times p)}. #' #' @examples #' ## simulated example #' # generate square matrices #' A = matrix(rnorm(25),nrow=5) #' X = matrix(rnorm(25),nrow=5) #' B = matrix(rnorm(25),nrow=5) #' C = A%*%X + X%*%B #' #' # solve using 'sylvester' function #' solX = sylvester(A,B,C) #' pm1 = "* Experiment with Sylvester Solver" #' pm2 = paste("* Absolute Error : ",norm(solX-X,"f"),sep="") #' pm3 = paste("* Relative Error : ",norm(solX-X,"f")/norm(X,"f"),sep="") #' cat(paste(pm1,"\n",pm2,"\n",pm3,sep="")) #' #' #' @references #' \insertRef{sanderson_armadillo_2016}{maotai} #' #' \insertRef{eddelbuettel_rcpparmadillo_2014}{maotai} #' #' @export sylvester <- function(A,B,C){ ################################################################### # check square matrix if (!check_sqmat(A)){ stop("* sylvester : an input 'A' should be a square matrix.") } if (!check_sqmat(B)){ stop("* sylvester : an input 'B' should be a square matrix.") } if (!check_sqmat(C)){ stop("* sylvester : an input 'C' should be a square matrix.") } ################################################################### # arrange and solve return(cpp_sylvester(A,B,-C)) }maotai/R/cov2corr.R0000644000175000017500000000274714176771675014000 0ustar nileshnilesh#' Convert Covariance into Correlation Matrix #' #' Given a covariance matrix, return a correlation matrix that has unit diagonals. #' We strictly impose (and check) whether the given input is a symmetric matrix #' of full-rank. #' #' @param mat a \eqn{(p\times p)} covariance matrix. #' #' @return a \eqn{(p\times p)} correlation matrix. #' #' @examples #' \donttest{ #' # generate an empirical covariance scaled #' prep_mat = stats::cov(matrix(rnorm(100*10),ncol=10)) #' prep_vec = diag(as.vector(stats::runif(10, max=5))) #' prep_cov = prep_vec%*%prep_mat%*%prep_vec #' #' # compute correlation matrix #' prep_cor = cov2corr(prep_cov) #' #' # visualize #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(1,2), pty="s") #' image(prep_cov, axes=FALSE, main="covariance") #' image(prep_cor, axes=FALSE, main="correlation") #' par(opar) #' } #' #' @export cov2corr <- function(mat){ # checker if (!check_covariance(mat)){ stop("* cov2corr : an input 'mat' is not a valid covariance matrix.") } dvec = diag(1/sqrt(diag(mat))) return(dvec%*%mat%*%dvec) } # checker ----------------------------------------------------------------- #' @keywords internal #' @noRd check_covariance <- function(mat){ # matrix if (!is.matrix(mat)){ return(FALSE) } # symmetric if (!isSymmetric(mat)){ return(FALSE) } # all positive diagonal if (any(diag(mat)<=0)){ return(FALSE) } if (as.integer(Matrix::rankMatrix(mat)) < base::nrow(mat)){ return(FALSE) } return(TRUE) } maotai/R/dpmeans.R0000644000175000017500000001252714176771675013665 0ustar nileshnilesh#' DP-means Algorithm for Clustering Euclidean Data #' #' DP-means is a nonparametric clustering method motivated by DP mixture model in that #' the number of clusters is determined by a parameter \eqn{\lambda}. The larger #' the \eqn{\lambda} value is, the smaller the number of clusters is attained. #' In addition to the original paper, we added an option to randomly permute #' an order of updating for each observation's membership as a common #' heuristic in the literature of cluster analysis. #' #' @param data an \eqn{(n\times p)} data matrix for each row being an observation. #' @param lambda a threshold to define a new cluster. #' @param maxiter maximum number of iterations. #' @param abstol stopping criterion #' @param permute.order a logical; \code{TRUE} if random order for permutation is used, \code{FALSE} otherwise. #' #' @return a named list containing #' \describe{ #' \item{cluster}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.} #' \item{centers}{a list containing information for out-of-sample prediction.} #' } #' #' @examples #' ## define data matrix of two clusters #' x1 = matrix(rnorm(50*3,mean= 2), ncol=3) #' x2 = matrix(rnorm(50*3,mean=-2), ncol=3) #' X = rbind(x1,x2) #' lab = c(rep(1,50),rep(2,50)) #' #' ## run dpmeans with several lambda values #' solA <- dpmeans(X, lambda= 5)$cluster #' solB <- dpmeans(X, lambda=10)$cluster #' solC <- dpmeans(X, lambda=20)$cluster #' #' ## visualize the results #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(1,4), pty="s") #' plot(X,col=lab, pch=19, cex=.8, main="True", xlab="x", ylab="y") #' plot(X,col=solA, pch=19, cex=.8, main="dpmeans lbd=5", xlab="x", ylab="y") #' plot(X,col=solB, pch=19, cex=.8, main="dpmeans lbd=10", xlab="x", ylab="y") #' plot(X,col=solC, pch=19, cex=.8, main="dpmeans lbd=20", xlab="x", ylab="y") #' par(opar) #' #' \donttest{ #' ## let's find variations by permuting orders of update #' ## used setting : lambda=20, we will 8 runs #' sol8 <- list() #' for (i in 1:8){ #' sol8[[i]] = dpmeans(X, lambda=20, permute.order=TRUE)$cluster #' } #' #' ## let's visualize #' vpar <- par(no.readonly=TRUE) #' par(mfrow=c(2,4), pty="s") #' for (i in 1:8){ #' pm = paste("permute no.",i,sep="") #' plot(X,col=sol8[[i]], pch=19, cex=.8, main=pm, xlab="x", ylab="y") #' } #' par(vpar) #' } #' #' @references #' \insertRef{kulis_revisiting_2012}{maotai} #' #' @export dpmeans <- function(data, lambda=1, maxiter=1234, abstol=1e-6, permute.order=FALSE){ ############################################################ # Preprocessing if (!check_datamat(data)){ stop("* dpmeans : an input 'data' should be a matrix without any missing/infinite values.") } # Parameter and Initialization n = nrow(data) p = ncol(data) k = 1 # set k=1 labels = rep(1,n) # labels={1,2,...,n} mu = matrix(colMeans(data), nrow=1) # global mean lambda = as.double(lambda) ############################################################ # Main Iteration ss.old = compute.ss(data, labels, mu)+ k*lambda ss.new = 0 for (iter in 1:maxiter){ # 0. updating order of observations if (permute.order){ idseq = sample(1:n) } else { idseq = 1:n } # 1. update the class membership per each class for (i in idseq){ # 1-1. compute distances to the centers # dic = rep(0, k); for (j in 1:k){dic[j] = sum((as.vector(data[i,])-as.vector(mu[j,]))^2)} dic = as.vector(dat2centers(data[i,], mu)); # cpp conversion # 1-2. assign new or stay if (min(dic) > lambda){ k = k+1 labels[i] = k mu = rbind(mu, data[i,]) } else { idmins = which(dic==min(dic)) if (length(idmins)>1){ labels[i] = sample(idmins, 1) } else { labels[i] = idmins } } } # 2. rearrange the label (remove empty ones) labels = as.factor(labels) ulabel = sort(unique(labels)) labnew = rep(0,n) for (i in 1:length(ulabel)){ labnew[(labels==ulabel[i])] = i } labels = labnew k = round(max(labels)) # 3. compute per-class means uassign = sort(unique(labels)) mu = array(0,c(k,p)) for (i in 1:k){ idmean = which(labels==uassign[i]) if (length(idmean)==1){ mu[i,] = as.vector(data[idmean,]) } else { mu[i,] = as.vector(colMeans(data[idmean,])) } } # 4. compute DPMEANS objective function ss.new = compute.ss(data, labels, mu) + k*lambda ss.delta = ss.old-ss.new ss.old = ss.new # 5. stop if updating is not significant if (ss.delta < abstol){ break } } ############################################################ # Return the results output = list() output$cluster = as.factor(labels) output$centers = mu return(output) } # auxiliary functions ----------------------------------------------------- #' @keywords internal #' @noRd compute.ss <- function(data, label, centers){ p = ncol(data) if (is.vector(centers)){ centers = matrix(centers, nrow=1) } ulabel = sort(unique(label)) output = 0 for (i in 1:length(ulabel)){ subdata = data[(label==ulabel[i]),] if (!is.vector(subdata)){ nn = nrow(subdata) for (j in 1:nn){ output = output + sum((as.vector(subdata[j,])-as.vector(centers[i,]))^2) } } } return(output) } maotai/R/weiszfeld.R0000644000175000017500000000467614176771675014240 0ustar nileshnilesh#' Weiszfeld Algorithm for Computing L1-median #' #' Geometric median, also known as L1-median, is a solution to the following problem #' \deqn{\textrm{argmin} \sum_{i=1}^n \| x_i - y \|_2 } #' for a given data \eqn{x_1,x_2,\ldots,x_n \in R^p}. #' #' @param X an \eqn{(n\times p)} matrix for \eqn{p}-dimensional signal. If vector is given, it is assumed that \eqn{p=1}. #' @param weights \code{NULL} for equal weight \code{rep(1/n,n)}; otherwise, it has to be a vector of length \eqn{n}. #' @param maxiter maximum number of iterations. #' @param abstol stopping criterion #' #' @examples #' ## generate sin(x) data with noise for 100 replicates #' set.seed(496) #' t = seq(from=0,to=10,length.out=20) #' X = array(0,c(100,20)) #' for (i in 1:100){ #' X[i,] = sin(t) + stats::rnorm(20, sd=0.5) #' } #' #' ## compute L1-median and L2-mean #' vecL2 = base::colMeans(X) #' vecL1 = weiszfeld(X) #' #' ## visualize #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(1,3), pty="s") #' matplot(t(X[1:5,]), type="l", main="5 generated data", ylim=c(-2,2)) #' plot(t, vecL2, type="l", col="blue", main="L2-mean", ylim=c(-2,2)) #' plot(t, vecL1, type="l", col="red", main="L1-median", ylim=c(-2,2)) #' par(opar) #' #' @export weiszfeld <- function(X, weights=NULL, maxiter=496, abstol=1e-6){ ############################################### # Preprocessing if (is.vector(X)){ X = matrix(X, ncol = 1) } n = nrow(X) d = ncol(X) if ((length(weights)==0)&&(is.null(weights))){ weights = rep(1/n, n) } if ((!is.vector(weights))||(length(weights)!=n)){ stop(paste0("* weiszfeld : 'weights' should be a vector of length ",n)) } ############################################### # Prepare myiter = round(maxiter) mytol = as.double(abstol) xinit = as.vector(base::colMeans(X)) epsnum = (100*.Machine$double.eps) ############################################### # Compute and Return return(as.vector(cpp_weiszfeld(X, mytol, myiter, xinit, weights, epsnum))) } # ## example from Gmedian:weiszfeld # ## Robustness of the geometric median of n=3 points in dimension d=2. # library(Gmedian) # # ## Computation speed # ## Simulated data - Brownian paths # n <- 1e4 # d <- 50 # x <- matrix(rnorm(n*d,sd=1/sqrt(d)), n, d) # x <- t(apply(x,1,cumsum)) # # library(microbenchmark) # microbenchmark::microbenchmark( # "resG1" = as.vector(Weiszfeld(x)$median), # "resG2" = as.vector(Gmedian(x)), # "resWW" = as.vector(maotai::weiszfeld(x)) # ) maotai/R/later_scalar2F1.R0000644000175000017500000000434714176771675015144 0ustar nileshnilesh#' Gauss Confluent Hypergeometric Function of Scalar Argument #' #' #' @references #' \insertRef{butler_laplace_2002}{maotai} #' #' @keywords internal #' @noRd scalar2F1 <- function(a, b, c, z, method=c("series","integral")){ # PREPARE # if (abs(z) >= 1){ # stop("* scalar2F1 : '|z| < 1' is required.") # } mymethod = ifelse(missing(method),"series", match.arg(tolower(method), c("laplace","integral","series"))) # COMPUTE output = switch(mymethod, integral = scalar2F1.integral(a,b,c,z), series = scalar2F1.series(a,b,c,z), laplace = scalar2F1.laplace(a,b,c,z)) return(output) } #' @keywords internal #' @noRd scalar2F1.integral <- function(a,b,c,z){ # conditions not met # INTEGRATION func.int <- function(y){ return((y^(a-1))*((1-y)^(c-a-1))*((1-(z*y))^(-b))) } myeps = 10*.Machine$double.eps term1 = stats::integrate(func.int, lower=(10*.Machine$double.eps), upper=1)$value term2 = 1/base::beta(a,c-a) return(term1*term2) } #' @keywords internal #' @noRd scalar2F1.series <- function(a,b,c,z){ no.stop = TRUE Mval = 1 n = 0 while (no.stop){ n = n+1 term1 = n*log(z) + sum(log((a + seq(from=0, to=(n-1), by=1)))) + sum(log((b + seq(from=0, to=(n-1), by=1)))) term2 = sum(log((c + seq(from=0, to=(n-1), by=1)))) + base::lfactorial(n) Mnow = exp(term1-term2) Mval = Mval + Mnow if (abs(Mnow) < 1e-10){ no.stop = FALSE } if (n > 100){ no.stop = FALSE } } return(Mval) } #' @keywords internal #' @noRd scalar2F1.laplace <- function(a,b,c,x){ tau = (x*(b-a)) - c yhat = (2*a)/(sqrt((tau^2) - (4*a*x*(c-b))) - tau) r21 = ((yhat^2)/a) + (((1-yhat)^2)/(c-a)) - exp((log(b) + 2*log(x) + 2*log(yhat) + 2*log(1-yhat))-(2*log(1-(x*yhat)) + log(a) + log(c-a))) log1 = (c-0.5)*log(c) log2 = -0.5*log(r21) log3 = a*(log(yhat)-log(a)) + (c-a)*(log(1-yhat)-log(c-a)) + -b*log(1-(x*yhat)) return(exp(log1+log2+log3)) } # # special case # myz = runif(1, min=-1, max=1) # asin(myz)/myz # scalar2F1(1/2,1/2,3/2,(myz^2), method = "series") # scalar2F1(1/2,1/2,3/2,(myz^2), method = "integral") # scalar2F1(1/2,1/2,3/2,(myz^2), method = "laplace") maotai/R/bmds.R0000644000175000017500000000575314176771675013166 0ustar nileshnilesh#' Bayesian Multidimensional Scaling #' #' A Bayesian formulation of classical Multidimensional Scaling is presented. #' Even though this method is based on MCMC sampling, we only return maximum a posterior (MAP) estimate #' that maximizes the posterior distribution. Due to its nature without any special tuning, #' increasing \code{mc.iter} requires much computation. #' #' @param data an \eqn{(n\times p)} matrix whose rows are observations. #' @param ndim an integer-valued target dimension. #' @param par.a hyperparameter for conjugate prior on variance term, i.e., \eqn{\sigma^2 \sim IG(a,b)}. Note that \eqn{b} is chosen appropriately as in paper. #' @param par.alpha hyperparameter for conjugate prior on diagonal term, i.e., \eqn{\lambda_j \sim IG(\alpha, \beta_j)}. Note that \eqn{\beta_j} is chosen appropriately as in paper. #' @param par.step stepsize for random-walk, which is standard deviation of Gaussian proposal. #' @param mc.iter the number of MCMC iterations. #' @param verbose a logical; \code{TRUE} to show iterations, \code{FALSE} otherwise. #' #' @return a named list containing #' \describe{ #' \item{embed}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.} #' \item{stress}{discrepancy between embedded and origianl data as a measure of error.} #' } #' #' @examples #' \donttest{ #' ## use simple example of iris dataset #' data(iris) #' idata = as.matrix(iris[,1:4]) #' #' ## run Bayesian MDS #' # let's run 10 iterations only. #' iris.cmds = cmds(idata, ndim=2) #' iris.bmds = bmds(idata, ndim=2, mc.iter=5, par.step=(2.38^2)) #' #' ## extract coordinates and class information #' cx = iris.cmds$embed # embedded coordinates of CMDS #' bx = iris.bmds$embed # BMDS #' icol = iris[,5] # class information #' #' ## visualize #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(2,1)) #' mc = paste0("CMDS with STRESS=",round(iris.cmds$stress,4)) #' mb = paste0("BMDS with STRESS=",round(iris.bmds$stress,4)) #' plot(cx, col=icol,pch=19,main=mc) #' plot(bx, col=icol,pch=19,main=mb) #' par(opar) #' } #' #' @references #' \insertRef{oh_bayesian_2001a}{maotai} #' #' @export bmds <- function(data, ndim=2, par.a=5, par.alpha=0.5, par.step=1, mc.iter=8128, verbose=TRUE){ ############################################################ # Preprocessing if (!check_datamat(data)){ stop("* bmds : an input 'data' should be a matrix without any missing/infinite values.") } xdiss = stats::as.dist(cpp_pdist(data)) ############################################################ # Run the Hidden Function mydim = round(ndim) mya = as.double(par.a) myalpha = as.double(par.alpha) mystep = as.double(par.step) myiter = round(mc.iter) myshow = as.logical(verbose) output = hidden_bmds(xdiss, ndim=mydim, par.a=mya, par.alpha=myalpha, par.step=mystep, mc.iter=myiter, verbose=myshow) ############################################################ # Return the output return(output) }maotai/R/cov2pcorr.R0000644000175000017500000000223714176771675014152 0ustar nileshnilesh#' Convert Covariance into Partial Correlation Matrix #' #' Given a covariance matrix, return a partial correlation matrix that has unit diagonals. #' We strictly impose (and check) whether the given input is a symmetric matrix #' of full-rank. #' #' @param mat a \eqn{(p\times p)} covariance matrix. #' #' @return a \eqn{(p\times p)} partial correlation matrix. #' #' @examples #' \donttest{ #' # generate an empirical covariance scaled #' prep_mat = stats::cov(matrix(rnorm(100*10),ncol=10)) #' prep_vec = diag(as.vector(stats::runif(10, max=5))) #' prep_cov = prep_vec%*%prep_mat%*%prep_vec #' #' # compute correlation and partial correlation matrices #' prep_cor = cov2corr(prep_cov) #' prep_par = cov2pcorr(prep_cov) #' #' # visualize #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(1,3), pty="s") #' image(prep_cov, axes=FALSE, main="covariance") #' image(prep_cor, axes=FALSE, main="correlation") #' image(prep_par, axes=FALSE, main="partial correlation") #' par(opar) #' } #' #' @export cov2pcorr <- function(mat){ # checker if (!check_covariance(mat)){ stop("* cov2corr : an input 'mat' is not a valid covariance matrix.") } return(src_cov2corr(mat)) }maotai/R/nem.R0000644000175000017500000000217114176771675013007 0ustar nileshnilesh#' Negative Eigenvalue Magnitude #' #' Negative Eigenvalue Magnitude (NEM) is a measure of distortion for the data #' whether they are lying in Euclidean manner or not. When the value is exactly 0, it means #' the data is Euclidean. On the other hand, when NEM is far away from 0, it means not Euclidean. #' The concept of NEM is closely related to the definiteness of a Gram matrix. #' #' @param data an \eqn{(n\times p)} matrix whose rows are observations. #' #' @return a nonnegative NEM value. #' #' @examples #' ## use simple example of iris dataset #' data(iris) #' mydat = as.matrix(iris[,1:4]) #' #' ## calculate NEM #' nem(mydat) #' #' @references #' \insertRef{pekalska_noneuclidean_2006}{maotai} #' #' @export nem <- function(data){ ############################################################ # Preprocessing if (!check_datamat(data)){ stop("* nem : an input 'data' should be a matrix without any missing/infinite values.") } xdiss = stats::as.dist(cpp_pdist(data)) ############################################################ # Compute and Return output = hidden_nem(xdiss) return(output) } maotai/R/nef.R0000644000175000017500000000215214176771675012777 0ustar nileshnilesh#' Negative Eigenfraction #' #' Negative Eigenfraction (NEF) is a measure of distortion for the data #' whether they are lying in Euclidean manner or not. When the value is exactly 0, it means #' the data is Euclidean. On the other hand, when NEF is far away from 0, it means not Euclidean. #' The concept of NEF is closely related to the definiteness of a Gram matrix. #' #' @param data an \eqn{(n\times p)} matrix whose rows are observations. #' #' @return a nonnegative NEF value. #' #' @examples #' ## use simple example of iris dataset #' data(iris) #' mydat = as.matrix(iris[,1:4]) #' #' ## calculate NEF #' nef(mydat) #' #' @references #' \insertRef{pekalska_noneuclidean_2006}{maotai} #' #' @export nef <- function(data){ ############################################################ # Preprocessing if (!check_datamat(data)){ stop("* nef : an input 'data' should be a matrix without any missing/infinite values.") } xdiss = stats::as.dist(cpp_pdist(data)) ############################################################ # Compute and Return output = hidden_nef(xdiss) return(output) }maotai/R/later_mbeta.R0000644000175000017500000000042614176771675014510 0ustar nileshnilesh#' Multivariate Beta #' #' #' @keywords internal #' @noRd mbeta <- function(m, a, b, log=FALSE){ m = round(m) logval = mgamma(m,a,log=TRUE) + mgamma(m,b,log=TRUE) - mgamma(m,(a+b),log=TRUE) if (log){ return(logval) } else { return(base::exp(logval)) } }maotai/R/later_scalarpFq.R0000644000175000017500000000122714176771675015334 0ustar nileshnilesh#' General Form of Hypergeometric Function #' #' #' @keywords internal #' @noRd scalarpFq <- function(veca, vecb, z){ p = length(veca) q = length(vecb) no.stop = TRUE Mval = 1 n = 0 while (no.stop){ n = n+1 terma = 0 for (i in 1:p){ terma = terma + sum(log((veca[i] + seq(from=0, to=(n-1), by=1)))) } termb = 0 for (j in 1:q){ termb = termb + sum(log((vecb[j] + seq(from=0,to=(n-1),by=1)))) } Mnow = exp(n*log(z) + terma - termb - base::lfactorial(n)) Mval = Mnow + Mval if (abs(Mnow) < 1e-10){ no.stop=FALSE } if (n>100){ no.stop=FALSE } } return(Mval) }maotai/R/later_findmed.R0000644000175000017500000000150614176771675015026 0ustar nileshnilesh#' Find a median from the pairwise dissimilarity #' #' #' @keywords internal #' @noRd findmed <- function(d, method=c("geometric","metric")){ ###################################################### # Preprocessing if (inherits(d, "dist")){ d = as.matrix(d) } else { if (!is.matrix(d)){ stop("* findmed : input 'd' should be a matrix.") } } if (missing(method)){ mymethod = "geometric" } else { mymethod = match.arg(method) } diag(d) = 0 ###################################################### # Compute and Return if (all(mymethod=="metric")){ # metric median; return(which.min(base::rowSums(d))) } else { # geometric median; m = base::nrow(d) vecm = rep(0,m) for (i in 1:m){ tgt = base::sort(as.vector(d[i,]), decreasing=FALSE) } } }maotai/R/package-maotai.R0000644000175000017500000000161314176771675015073 0ustar nileshnilesh#' Tools for Matrix Algebra, Optimization and Inference #' #' Matrix is an universal and sometimes primary object/unit in applied mathematics and statistics. We provide a number of algorithms for selected problems in optimization and statistical inference. #' #' @docType package #' @name package-maotai #' @import Rdpack #' @noRd #' @importFrom dbscan dbscan #' @importFrom fastcluster hclust #' @importFrom RANN nn2 #' @importFrom cluster pam silhouette #' @importFrom stats as.dist knots ecdf rnorm runif quantile dist rgamma rgeom var cov lm #' @importFrom shapes procGPA #' @importFrom Rtsne Rtsne #' @importFrom utils packageVersion #' @importFrom RSpectra eigs #' @importFrom Matrix rankMatrix #' @importFrom Rcpp evalCpp #' @useDynLib maotai NULL # pack <- "maotai" # path <- find.package(pack) # system(paste(shQuote(file.path(R.home("bin"), "R")), # "CMD", "Rd2pdf", shQuote(path)))maotai/R/later_install_scipy.R0000644000175000017500000000032614176771675016274 0ustar nileshnilesh#' Install 'SciPy' Python Module #' #' #' @keywords internal #' @noRd install_scipy <- function(method = "auto", conda = "auto") { # reticulate::py_install("scipy", method = method, conda = conda) return(1) }maotai/R/obsolete_distgmm.R0000644000175000017500000001561414176771675015576 0ustar nileshnilesh#' #' Distance Measures between Multiple Samples using Gaussian Mixture Models #' #' #' #' Taking multiple observations (a sample) as a unit of analysis requires #' #' a measure of discrepancy between samples. \code{distgmm} fits finite #' #' Gaussian mixture models to each sample and use the fitted model as #' #' a representation of a single sample. A single model is selected via #' #' Bayesian Information Criterion (BIC). #' #' #' #' @param datalist a length \eqn{N} list of samples. All elements of the list should be of same type, either \code{vector} or \code{matrix} of same dimension (number of columns). #' #' @param method name of the distance/dissimilarity measure. #' #' @param maxk maximum number of clusters to be fitted using GMM. #' #' @param as.dist a logical; \code{TRUE} to return \code{dist} object, \code{FALSE} to return an \eqn{(N\times N)} symmetric matrix of pairwise distances. #' #' #' #' @return either \code{dist} object of an \eqn{(N\times N)} symmetric matrix of pairwise distances by \code{as.dist} argument. #' #' #' #' @examples #' #' ## let's try two-dimensional data of 30 samples #' #' ## single or mixture of two and three gaussian distributions. #' #' dlist = list() #' #' for (i in 1:10){ #' #' dlist[[i]] = matrix(rnorm(120),ncol=2) #' #' } #' #' for (i in 11:20){ #' #' A = matrix(rnorm(60,mean=-4),ncol=2) #' #' B = matrix(rnorm(60,mean= 4),ncol=2) #' #' dlist[[i]] = rbind(A,B) #' #' } #' #' for (i in 21:30){ #' #' A = matrix(rnorm(40,mean=-4),ncol=2) #' #' B = matrix(rnorm(40),ncol=2) #' #' C = matrix(rnorm(40,mean= 4),ncol=2) #' #' dlist[[i]] = rbind(A,B,C) #' #' } #' #' #' #' ## compute pairwise distances, expecting (3 x 3) block structure. #' #' mm = distgmm(dlist, maxk=5) #' #' #' #' ## visualize #' #' opar <- par(no.readonly=TRUE) #' #' par(pty="s") #' #' image(mm[,nrow(mm):1], main="3-block pattern as expected") #' #' par(opar) #' #' #' #' @keywords internal #' #' @noRd #' distgmm <- function(datalist, method=c("L2"), maxk=5, as.dist=FALSE){ #' ####################################################### #' # Preprocessing : checkers #' if (!check_datalist(datalist)){ #' stop("* distgmm : an input should be a list containing samples of same dimension.") #' } #' maxk = round(maxk) #' method = match.arg(method) #' nlist = length(datalist) #' #' ####################################################### #' # Compute : GMM #' list.gmm = list() #' if (is.vector(datalist[[1]])){ #' vec.flag = TRUE #' for (n in 1:nlist){ #' list.gmm[[n]] = mclust::Mclust(datalist[[n]], G=1:maxk, modelNames="V", verbose=FALSE)$parameters #' } #' } else { #' vec.flag = FALSE #' for (n in 1:nlist){ #' list.gmm[[n]] = mclust::Mclust(datalist[[n]], G=1:maxk, verbose=FALSE, modelNames="VVV")$parameters #' } #' } #' #' ####################################################### #' # Compute : Pairwise Distance #' output = array(0,c(nlist,nlist)) #' for (i in 1:(nlist-1)){ #' objA = list.gmm[[i]] #' for (j in (i+1):nlist){ #' objB = list.gmm[[j]] #' if (vec.flag){ #' theval = switch(method, #' "L2" = distgmm_l2_1d(objA, objB)) #' output[i,j] <- output[j,i] <- theval #' } else { #' theval = switch(method, #' "L2" = distgmm_l2_nd(objA, objB)) #' output[i,j] <- output[j,i] <- theval #' } #' } #' } #' #' ####################################################### #' if (as.dist){ #' return(stats::as.dist(output)) #' } else { #' return(output) #' } #' } #' #' #' # use Mclust 'parameters' object ------------------------------------------ #' #' @keywords internal #' #' @noRd #' distgmm_l2_1d <- function(objA, objB){ #' weightA = as.vector(objA$pro) #' muA = matrix(objA$mean, ncol=1) #' covA = array(0,c(1,1,length(weightA))) #' for (i in 1:length(weightA)){ #' covA[,,i] = objA$variance$sigmasq[i] #' } #' weightB = as.vector(objB$pro) #' muB = matrix(objB$mean, ncol=1) #' covB = array(0,c(1,1,length(weightB))) #' for (i in 1:length(weightB)){ #' covB[,,i] = objB$variance$sigmasq[i] #' } #' #' ## run CPP (same for both 1d and nd cases) #' cpp.res = cpp_pairwise_L2(muA, muB, covA, covB) #' A = cpp.res$A #' B = cpp.res$B #' C = cpp.res$C #' #' ## matrix multiplication #' term1 = base::sum(as.vector(A%*%weightA)*weightA) #' term2 = base::sum(as.vector(B%*%weightB)*weightB) #' term3 = -2*base::sum(as.vector(C%*%weightB)*weightA) #' #' ## return distance/ L2 needs to be taken square root. #' return(base::sqrt(term1+term2+term3)) #' } #' #' @keywords internal #' #' @noRd #' distgmm_l2_nd <- function(objA, objB){ #' weightA = as.vector(objA$pro) #' muA = t(objA$mean) #' covA = objA$variance$sigma #' #' weightB = as.vector(objB$pro) #' muB = t(objB$mean) #' covB = objB$variance$sigma #' #' if (length(dim(covA)) < 3){ #' tmpA = covA #' covA = array(0,c(ncol(muA),ncol(muA),1)) #' covA[,,1] = as.matrix(tmpA) #' } #' if (length(dim(covB)) < 3){ #' tmpB = covB #' covB = array(0,c(ncol(muB),ncol(muB),1)) #' covB[,,1] = as.matrix(tmpB) #' } #' #' ## run CPP (same for both 1d and nd cases) #' cpp.res = cpp_pairwise_L2(muA, muB, covA, covB) #' A = cpp.res$A #' B = cpp.res$B #' C = cpp.res$C #' #' ## matrix multiplication #' term1 = base::sum(as.vector(A%*%weightA)*weightA) #' term2 = base::sum(as.vector(B%*%weightB)*weightB) #' term3 = -2*base::sum(as.vector(C%*%weightB)*weightA) #' #' ## return distance/ L2 needs to be taken square root. #' return(base::sqrt(term1+term2+term3)) #' } #' # # personal experiment ----------------------------------------------------- # x = list() # for (i in 1:20){ # x[[i]] = matrix(rnorm(300*2),ncol=2) # } # for (i in 21:40){ # x[[i]] = rbind(matrix(rnorm(150*2,mean=-4),ncol=2), matrix(rnorm(150*2,mean=4),ncol=2)) # } # for (i in 41:60){ # x[[i]] = rbind(matrix(rnorm(100*2,mean=-4),ncol=2), matrix(rnorm(100*2),ncol=2), matrix(rnorm(150*2,mean=4),ncol=2)) # } # mm = distgmm(x, maxk=10) # image(mm[,nrow(mm):1]) # bestgmm <- function(dat){ # # belows are all automatically implemented in mclustBIC # # # run mclustBIC # # opt_gmm <- (mclust::mclustBIC(dat, G=1:9, verbose = FALSE)) # # colgmm <- colnames(opt_gmm) # # rowgmm <- 1:9 # # # # # extract mclustBIC information # # mm <- matrix(opt_gmm, nrow=nrow(opt_gmm)) # # mm[is.na(mm)] = -Inf # # idmax <- as.integer(which(mm == max(mm), arr.ind = TRUE)) # show the # # # # nclust <- rowgmm[idmax[1]] # # vartype <- colgmm[idmax[2]] # # # run Mclust # runobj <- mclust::Mclust(dat, G=1:10, verbose=FALSE) # # # run GMM with prespecified results # output = list() # output$weight = runobj$parameters$pro # output$mu = t(runobj$parameters$mean) # output$cov = runobj$parameters$variance # return(output) # } maotai/R/later_scalar0F1.R0000644000175000017500000000136714176771675015141 0ustar nileshnilesh#' Hypergeometric 0F1 #' #' #' @keywords internal #' @noRd scalar0F1 <- function(a, z, method=c("series")){ # PREPARE mymethod = ifelse(missing(method),"series", match.arg(tolower(method), c("series"))) # COMPUTE output = switch(mymethod, series = scalar0F1.series(a, z)) return(output) } #' @keywords internal #' @noRd scalar0F1.series <- function(a, z){ no.stop = TRUE Mval = 1 n = 0 while (no.stop){ n = n+1 M.now = exp(n*log(z) - sum(log((a + seq(from=0, to=(n-1), by=1)))) - base::lfactorial(n)) Mval = Mval + M.now if (abs(M.now) < 1e-10){ no.stop = FALSE } if (n > 100){ no.stop = FALSE } } return(Mval) } maotai/R/boot.mblock.R0000644000175000017500000000410614176771675014441 0ustar nileshnilesh#' Generate Index for Moving Block Bootstrapping #' #' Assuming data being dependent with cardinality \code{N}, \code{boot.mblock} returns #' a vector of index that is used for moving block bootstrapping. #' #' @param N the number of observations. #' @param b the size of a block to be drawn. #' #' @return a vector of length \code{N} for moving block bootstrap sampling. #' #' @examples #' \donttest{ #' ## example : bootstrap confidence interval of mean and variances #' vec.x = seq(from=0,to=10,length.out=100) #' vec.y = sin(1.21*vec.x) + 2*cos(3.14*vec.x) + rnorm(100,sd=1.5) #' data.mu = mean(vec.y) #' data.var = var(vec.y) #' #' ## apply moving block bootstrapping #' nreps = 50 #' vec.mu = rep(0,nreps) #' vec.var = rep(0,nreps) #' for (i in 1:nreps){ #' sample.id = boot.mblock(100, b=10) #' sample.y = vec.y[sample.id] #' vec.mu[i] = mean(sample.y) #' vec.var[i] = var(sample.y) #' print(paste("iteration ",i,"/",nreps," complete.", sep="")) #' } #' #' ## visualize #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(1,3), pty="s") #' plot(vec.x, vec.y, type="l", main="1d signal") # 1d signal #' hist(vec.mu, main="mean CI", xlab="mu") # mean #' abline(v=data.mu, col="red", lwd=4) #' hist(vec.var, main="variance CI", xlab="sigma") # variance #' abline(v=data.var, col="blue", lwd=4) #' par(opar) #' } #' #' @references #' \insertRef{kunsch_jackknife_1989}{maotai} #' #' @export boot.mblock <- function(N, b=max(2,round(N/10))){ ################################################################### # Preprocessing myn = round(N) myb = round(b) vec1N = c(1:myn,1:myn,1:myn) ################################################################### # Preparation id0 = 1 idb = (myn-myb+1) id0b = (id0:idb) # starting point ################################################################### # Computation output = c() while (length(output) 0} for \eqn{i\neq j}, #' (3) \eqn{d_{ij} = d_{ji}}, and (4) \eqn{d_{ij} \leq d_{ik} + d_{kj}}. #' #' @param d \code{"dist"} object or \eqn{(N\times N)} matrix of pairwise distances. #' #' @return a logical; \code{TRUE} if it satisfies metric property, \code{FALSE} otherwise. #' #' @examples #' ## Let's use L2 distance matrix of iris dataset #' data(iris) #' dx = as.matrix(stats::dist(iris[,1:4])) #' #' # perturb d(i,j) #' dy = dx #' dy[1,2] <- dy[2,1] <- 10 #' #' # run the algorithm #' checkmetric(dx) #' checkmetric(dy) #' #' @seealso \code{\link{checkdist}} #' @export checkmetric <- function(d){ if (inherits(d, "dist")){ d = as.matrix(d) } else { if (!is.matrix(d)){ stop("* checkmetric : input 'd' should be a matrix.") } } # 1. square matrix if (nrow(d)!=ncol(d)){ message(" checkmetric : input 'd' is not a square matrix.") return(FALSE) } # 2. zero diagonals if (any(diag(d)!=0)){ message(" checkmetric : input 'd' has non-zero diagonals.") return(FALSE) } # 3. all positive elements if (any(d < 0)){ message(" checkmetric : input 'd' contains negative values.") return(FALSE) } # 4. symmetric if (!base::isSymmetric(d)){ message(" checkmetric : input 'd' is not symmetric.") return(FALSE) } # 5. triangle inequality return(cpp_triangle(d)) } # data(iris) # xx = as.matrix(iris[,1:4]) # dx = stats::dist(xx) # dd = as.matrix(dx) # # checkdist(dx) # checkmetric(dx) # # i=4 # j=11 # k=8 # # dd[i,j] # dd[i,k]+dd[k,j]maotai/R/later_scalar1F1.R0000644000175000017500000000407614176771675015142 0ustar nileshnilesh#' Kummer's Confluent Hypergeometric Function of Scalar Argument #' #' @references #' \insertRef{butler_laplace_2002}{maotai} #' #' @keywords internal #' @noRd scalar1F1 <- function(a, b, z, method=c("series","laplace","integral")){ # PREPARE mymethod = ifelse(missing(method),"series", match.arg(tolower(method), c("laplace","integral","series"))) # COMPUTE output = switch(mymethod, integral = scalar1F1.integral(a,b,z), series = scalar1F1.series(a,b,z), laplace = scalar1F1.laplace(a,b,z)) return(output) } #' @keywords internal #' @noRd scalar1F1.integral <- function(a, b, z){ # REQUIREMENT if (!((b>a)&&(a>0))){ stop("scalar1F1 : 'integral' method requires 'b > a > 0'.") } # INTEGRATION func.int <- function(y){ return((y^(a-1))*((1-y)^(b-a-1))*exp(z*y)) } myeps = 10*.Machine$double.eps term1 = stats::integrate(func.int, lower=(10*.Machine$double.eps), upper=1)$value term2 = 1/base::beta(a,b-a) return(term1*term2) } #' @keywords internal #' @noRd scalar1F1.series <- function(a, b, z){ no.stop = TRUE Mval = 1 n = 0 while (no.stop){ n = n+1 M.now = exp(sum(log((a + seq(from=0, to=(n-1), by=1)))) + n*log(z) - sum(log((b + seq(from=0, to=(n-1), by=1)))) - base::lfactorial(n)) Mval = Mval + M.now if (abs(M.now) < 1e-10){ no.stop = FALSE } if (n > 100){ no.stop = FALSE } } return(Mval) } #' @keywords internal #' @noRd scalar1F1.laplace <- function(a, b, x){ yhat = (2*a)/(b-x+sqrt(((x-b)^2) + (4*a*x))) r11 = (yhat^2)/a + ((1-yhat)^2)/(b-a) log1 = (b-0.5)*log(b) log2 = -0.5*log(r11) log3 = a*(log(yhat)-log(a)) log4 = (b-a)*(log(1-yhat)-log(b-a)) log5 = x*yhat output = exp(log1+log2+log3+log4+log5) return(output) } # mya = 1/2 # myb = sample(2:20, 1)/2 # myx = abs(1+rnorm(1, sd=10)) # # fAsianOptions::kummerM(myx,mya,myb) # scalar1F1(mya,myb,myx,method="integral") # scalar1F1(mya,myb,myx,method="series") # scalar1F1(mya,myb,myx,method="laplace") maotai/R/ecdfdist2.R0000644000175000017500000001113314176771675014075 0ustar nileshnilesh#' Pairwise Measures for Two Sets of Empirical CDFs #' #' We measure distance between two sets of empirical cumulative distribution functions (ECDF). For #' simplicity, we only take an input of \code{\link[stats]{ecdf}} objects from \pkg{stats} package. #' #' @param elist1 a length \eqn{M} list of \code{ecdf} objects. #' @param elist2 a length \eqn{N} list of \code{ecdf} objects. #' @param method name of the distance/dissimilarity measure. Case insensitive. #' @param p exponent for \code{Lp} or \code{Wasserstein} distance. #' #' @return an \eqn{(M\times N)} matrix of pairwise distances. #' #' @seealso \code{\link[stats]{ecdf}} \code{\link{ecdfdist}} #' #' @examples #' \donttest{ #' ## toy example #' # first list : 10 of random and uniform distributions #' mylist1 = list() #' for (i in 1:10){ mylist1[[i]] = stats::ecdf(stats::rnorm(50, sd=2))} #' for (i in 11:20){mylist1[[i]] = stats::ecdf(stats::runif(50, min=-5))} #' #' # second list : 15 uniform and random distributions #' mylist2 = list() #' for (i in 1:15){ mylist2[[i]] = stats::ecdf(stats::runif(50, min=-5))} #' for (i in 16:30){mylist2[[i]] = stats::ecdf(stats::rnorm(50, sd=2))} #' #' ## compute Kolmogorov-Smirnov distance #' dm2ks = ecdfdist2(mylist1, mylist2, method="KS") #' dm2lp = ecdfdist2(mylist1, mylist2, method="lp") #' dm2wa = ecdfdist2(mylist1, mylist2, method="wasserstein") #' nrs = nrow(dm2ks) #' #' ## visualize #' opar = par(no.readonly=TRUE) #' par(mfrow=c(1,3), pty="s") #' image(dm2ks[,nrs:1], axes=FALSE, main="Kolmogorov-Smirnov") #' image(dm2lp[,nrs:1], axes=FALSE, main="L2") #' image(dm2wa[,nrs:1], axes=FALSE, main="Wasserstein") #' par(opar) #' } #' #' @export ecdfdist2 <- function(elist1, elist2, method=c("KS","Lp","Wasserstein"), p=2){ ############################################### # Preprocessing if (!elist_check(elist1)){stop("* ecdfdist2 : input 'elist1' should be a list of 'ecdf' objects.")} if (!elist_check(elist2)){stop("* ecdfdist2 : input 'elist2' should be a list of 'ecdf' objects.")} methodss = c("ks","wasserstein","lp") mymethod = tolower(method) mymethod = match.arg(mymethod, methodss) myp = as.integer(p) if (myp <= 0){ stop("* ecdfdist2 : exponent 'p' should be a nonnegative number.") } ############################################### # Computation output = switch(mymethod, "ks" = dist2_ks(elist1, elist2), "wasserstein" = dist2_wasserstein(elist1, elist2, myp), "lp" = dist2_lp(elist1, elist2, myp)) ############################################### # Return return(output) } # single functions -------------------------------------------------------- # (1) dist2_ks : kolmogorov-smirnov # (2) dist2_wasserstein : 1d wasserstein distance # (3) dist2_lp : Lp distance #' @keywords internal #' @noRd dist2_ks <- function(elist1, elist2){ M = length(elist1) N = length(elist2) trflst = elist_fform(c(elist1, elist2)) flist1 = trflst$fval[1:M] flist2 = trflst$fval[(M+1):(M+N)] output = array(0,c(M,N)) for (i in 1:M){ fi = flist1[[i]] for (j in 1:N){ fj = flist2[[j]] theval = max(abs(fi-fj)) output[i,j] <- theval[1] } } return(output) } #' @keywords internal #' @noRd dist2_lp <- function(elist1, elist2, p){ M = length(elist1) N = length(elist2) trflst = elist_fform(c(elist1, elist2)) flist1 = trflst$fval[1:M] flist2 = trflst$fval[(M+1):(M+N)] output = array(0,c(M,N)) for (i in 1:M){ fi = flist1[[i]] for (j in 1:N){ fj = flist2[[j]] if (is.infinite(p)){ output[i,j] = base::max(base::abs(fi-fj))[1] } else { output[i,j] <- ((integrate_1d(trflst$tseq, (abs(fi-fj)^p)))^(1/p)) } } } return(output) } #' @keywords internal #' @noRd dist2_wasserstein <- function(elist1, elist2, p){ M = length(elist1) N = length(elist2) trflst = elist_fform(c(elist1, elist2)) flist1 = trflst$fval[1:M] flist2 = trflst$fval[(M+1):(M+N)] qseq = base::seq(from=1e-6, to=1-(1e-6), length.out=8128) quants1 = list() # compute quantile functions first quants2 = list() for (i in 1:M){ quants1[[i]] = as.double(stats::quantile(elist1[[i]], qseq)) } for (j in 1:N){ quants2[[j]] = as.double(stats::quantile(elist2[[j]], qseq)) } output = array(0,c(M,N)) for (i in 1:M){ vali = quants1[[i]] for (j in 1:N){ valj = quants2[[j]] valij = abs(vali-valj) if (is.infinite(p)){ output[i,j] = base::max(valij) } else { output[i,j] <- ((integrate_1d(qseq, valij^p))^(1/p)) } } } return(output) }maotai/R/aux_hidden_operation.R0000644000175000017500000001115014176771675016415 0ustar nileshnilesh# Hidden Functions : Operations # these functions can be loaded using 'utils::getFromNamespace' # by the command 'getFromNamespace("function_name","maotai"); # # 01. hidden_pinv : pseudo-inverse # 02. hidden_vech : half vectorization including the diagonal. # hidden_ivech diagonal inclusion is also taken care. # 03. hidden_lab2ind : create an index list from a label vector # hidden_ind2lab given an index list, create a label vector # 04. hidden_subsetid : generate split of the subset id # 05. hidden_geigen : do 'geigen' operation; Decreasing order # 06. hidden_knn # 07. hidden_knee_clamped : knee-point detection with clamped least squares - return idx # 08. hidden_knn_binary : return a sparse binary matrix for Euclidean data excluding selfl dgCMatrix # 01. hidden_pinv --------------------------------------------------------- #' @keywords internal #' @noRd hidden_pinv <- function(A){ return(aux_pinv(A)) } # 02. hidden_vech & hidden_ivech ------------------------------------------ #' @keywords internal #' @noRd hidden_vech <- function(A, diag=TRUE){ if ((!is.matrix(A))||(nrow(A)!=ncol(A))){ stop("* hidden_vech : input should be a square matrix.") } mydiag = as.logical(diag) return(A[base::lower.tri(A, diag=mydiag)]) } #' @keywords internal #' @noRd hidden_ivech <- function(a, diag=TRUE){ k = length(a) if (diag){ n = round((-1 + sqrt(1+(8*k)))/2) output = array(0,c(n,n)) output[lower.tri(output, diag = TRUE)] = a output = output + t(output) diag(output) = diag(output)/2 } else { n = round((1+sqrt(1+8*k))/2) output = array(0,c(n,n)) output[lower.tri(output, diag = FALSE)] = a output = output + t(output) } return(output) } # 03. hidden_lab2ind & hidden_ind2lab ------------------------------------- #' @keywords internal #' @noRd hidden_lab2ind <- function(label){ ulabel = base::unique(label) nlabel = length(ulabel) index = list() for (k in 1:nlabel){ index[[k]] = which(label==ulabel[k]) } return(index) } #' @keywords internal #' @noRd hidden_ind2lab <- function(index){ K = length(index) N = sum(unlist(lapply(index, length))) output = rep(0,N) for (k in 1:K){ output[index[[k]]] = k } return(output) } # 04. hidden_subsetid ----------------------------------------------------- #' @keywords internal #' @noRd hidden_subsetid <- function(n, k){ return(base::split(base::sample(n), base::sort(n%%k))) } # 05. hidden_geigen ------------------------------------------------------- #' It mimics the behavior of 'geigen' function with normalization added #' @keywords internal #' @noRd hidden_geigen <- function(A, B, normalize=TRUE){ n = nrow(A) runs = cpp_geigen(A,B) tval = as.vector(base::Re(runs$values))[n:1] tvec = base::Re(runs$vectors)[,n:1] if (normalize){ for (i in 1:n){ tgt = as.vector(tvec[,i]) tvec[,i] = tgt/sqrt(sum(tgt^2)) } } output = list() output$values = tval output$vectors = tvec return(output) } # 06. hidden_knn ---------------------------------------------------------- #' @keywords internal #' @noRd hidden_knn <- function(dat, nnbd=2, ...){ nnbd = round(nnbd) return(RANN::nn2(dat, k=nnbd, ...)) } # 07. hidden_knee_clamped ------------------------------------------------- #' @keywords internal #' @noRd hidden_knee_clamped_basic <- function(x, y){ m = length(x) c = x[1] d = y[1] a = x[m] b = y[m] y2 = (((b-d)/(a-c))*(x-c))+d return(sum((y-y2)^2)) } #' @keywords internal #' @noRd hidden_knee_clamped <- function(x, y){ x = as.vector(x) y = as.vector(y) n = length(x) if (n < 3){ stop("* knee point detection : length must be larger than 2.") } scores = rep(Inf, n) for (i in 2:(n-1)){ x.left = x[1:i] y.left = y[1:i] x.right = x[i:n] y.right = y[i:n] term1 = hidden_knee_clamped_basic(x.left, y.left) term2 = hidden_knee_clamped_basic(x.right, y.right) scores[i] = term1+term2 } return(which.min(scores)) # return the index of the minimal SSE's } # 08. hidden_knn_binary --------------------------------------------------- # excluding self and return a binary sparse adjacency matrix #' @keywords internal #' @noRd hidden_knn_binary <- function(data, nnbd=1, construction=c("or","and")){ n = base::nrow(data) nnbd = max(round(nnbd), 1) construction = match.arg(construction) if (all(construction=="and")){ intersect = TRUE } else { intersect = FALSE } run_knn = RANN::nn2(data, k=nnbd+1)$nn.idx[,2:(nnbd+1)]-1 # -1 for C++ convention run_res = src_construct_by_knn(run_knn, intersect) return(run_res) } maotai/R/ecdfdist.R0000644000175000017500000001040414176771675014013 0ustar nileshnilesh#' Distance Measures between Multiple Empirical Cumulative Distribution Functions #' #' We measure distance between two empirical cumulative distribution functions (ECDF). For #' simplicity, we only take an input of \code{\link[stats]{ecdf}} objects from \pkg{stats} package. #' #' @param elist a length \eqn{N} list of \code{ecdf} objects. #' @param method name of the distance/dissimilarity measure. Case insensitive. #' @param p exponent for \code{Lp} or \code{Wasserstein} distance. #' @param as.dist a logical; \code{TRUE} to return \code{dist} object, \code{FALSE} to return an \eqn{(N\times N)} symmetric matrix of pairwise distances. #' #' @return either \code{dist} object of an \eqn{(N\times N)} symmetric matrix of pairwise distances by \code{as.dist} argument. #' #' @seealso \code{\link[stats]{ecdf}} #' #' @examples #' \donttest{ #' ## toy example : 10 of random and uniform distributions #' mylist = list() #' for (i in 1:10){ #' mylist[[i]] = stats::ecdf(stats::rnorm(50, sd=2)) #' } #' for (i in 11:20){ #' mylist[[i]] = stats::ecdf(stats::runif(50, min=-5)) #' } #' #' ## compute Kolmogorov-Smirnov distance #' dm = ecdfdist(mylist, method="KS") #' #' ## visualize #' mks =" KS distances of 2 Types" #' opar = par(no.readonly=TRUE) #' par(pty="s") #' image(dm[,nrow(dm):1], axes=FALSE, main=mks) #' par(opar) #' } #' #' @export ecdfdist <- function(elist, method=c("KS","Lp","Wasserstein"), p=2, as.dist=FALSE){ ############################################### # Preprocessing if (!elist_check(elist)){ stop("* ecdfdist : input 'elist' should be a list of 'ecdf' objects.") } methodss = c("ks","wasserstein","lp") mymethod = tolower(method) mymethod = match.arg(mymethod, methodss) myp = round(p) if (myp <= 0){ stop("* ecdfdist : exponent 'p' should be a nonnegative number.") } ############################################### # Computation output = switch(mymethod, "ks" = dist_ks(elist), "wasserstein" = dist_wasserstein(elist, myp), "lp" = dist_lp(elist, myp)) ############################################### # Report if (as.dist){ return(stats::as.dist(output)) } else { return(output) } } # single functions -------------------------------------------------------- # (1) dist_ks : kolmogorov-smirnov # (2) dist_wasserstein : 1d wasserstein distance # (3) dist_lp : Lp distance #' @keywords internal #' @noRd dist_ks <- function(elist){ trflist = elist_fform(elist) flist = trflist$fval nlist = length(flist) output = array(0,c(nlist,nlist)) for (i in 1:(nlist-1)){ fi = flist[[i]] for (j in (i+1):nlist){ fj = flist[[j]] theval = max(abs(fi-fj)) output[i,j] <- output[j,i] <- theval[1] } } return(output) } #' @keywords internal #' @noRd dist_lp <- function(elist, p){ nlist = length(elist) trflist = elist_fform(elist) flist = trflist$fval nlist = length(flist) output = array(0,c(nlist,nlist)) if (is.infinite(p)){ for (i in 1:(nlist-1)){ fi = flist[[i]] for (j in (i+1):nlist){ fj = flist[[j]] output[i,j] <- output[j,i] <- base::max(base::abs(fi-fj))[1] } } } else { for (i in 1:(nlist-1)){ fi = flist[[i]] for (j in (i+1):nlist){ fj = flist[[j]] theval = ((integrate_1d(trflist$tseq, (abs(fi-fj)^p)))^(1/p)) output[i,j] <- output[j,i] <- theval } } } return(output) } #' @keywords internal #' @noRd dist_wasserstein <- function(elist, p){ nlist = length(elist) qseq = base::seq(from=1e-6, to=1-(1e-6), length.out=8128) quants = list() # compute quantile functions first for (i in 1:nlist){ quants[[i]] = as.double(stats::quantile(elist[[i]], qseq)) } output = array(0,c(nlist,nlist)) for (i in 1:(nlist-1)){ vali = quants[[i]] for (j in (i+1):nlist){ valj = quants[[j]] valij = abs(vali-valj) if (is.infinite(p)){ output[i,j] <- output[j,i] <- base::max(valij) } else { theval <- ((integrate_1d(qseq, valij^p))^(1/p)) output[i,j] <- output[j,i] <- theval } } } return(output) } ## wasserstein : http://www-users.math.umn.edu/~bobko001/preprints/2016_BL_Order.statistics_Revised.version.pdfmaotai/R/mmd2test.R0000644000175000017500000001260714176771675013774 0ustar nileshnilesh#' Kernel Two-sample Test with Maximum Mean Discrepancy #' #' Maximum Mean Discrepancy (MMD) as a measure of discrepancy between #' samples is employed as a test statistic for two-sample hypothesis test #' of equal distributions. Kernel matrix \eqn{K} is a symmetric square matrix #' that is positive semidefinite. #' #' @param K kernel matrix or an object of \code{kernelMatrix} class from \pkg{kernlab} package. #' @param label label vector of class indices. #' @param method type of estimator to be used. \code{"b"} for biased and \code{"u"} for unbiased estimator of MMD. #' @param mc.iter the number of Monte Carlo resampling iterations. #' #' @return a (list) object of \code{S3} class \code{htest} containing: \describe{ #' \item{statistic}{a test statistic.} #' \item{p.value}{\eqn{p}-value under \eqn{H_0}.} #' \item{alternative}{alternative hypothesis.} #' \item{method}{name of the test.} #' \item{data.name}{name(s) of provided kernel matrix.} #' } #' #' @examples #' ## small test for CRAN submission #' dat1 <- matrix(rnorm(60, mean= 1), ncol=2) # group 1 : 30 obs of mean 1 #' dat2 <- matrix(rnorm(50, mean=-1), ncol=2) # group 2 : 25 obs of mean -1 #' #' dmat <- as.matrix(dist(rbind(dat1, dat2))) # Euclidean distance matrix #' kmat <- exp(-(dmat^2)) # build a gaussian kernel matrix #' lab <- c(rep(1,30), rep(2,25)) # corresponding label #' #' mmd2test(kmat, lab) # run the code ! #' #' \dontrun{ #' ## WARNING: computationally heavy. #' # Let's compute empirical Type 1 error at alpha=0.05 #' niter = 496 #' pvals1 = rep(0,niter) #' pvals2 = rep(0,niter) #' for (i in 1:niter){ #' dat = matrix(rnorm(200),ncol=2) #' lab = c(rep(1,50), rep(2,50)) #' lbd = 0.1 #' kmat = exp(-lbd*(as.matrix(dist(dat))^2)) #' pvals1[i] = mmd2test(kmat, lab, method="b")$p.value #' pvals2[i] = mmd2test(kmat, lab, method="u")$p.value #' print(paste("iteration ",i," complete..",sep="")) #' } #' #' # Visualize the above at multiple significance levels #' alphas = seq(from=0.001, to=0.999, length.out=100) #' errors1 = rep(0,100) #' errors2 = rep(0,100) #' for (i in 1:100){ #' errors1[i] = sum(pvals1<=alphas[i])/niter #' errors2[i] = sum(pvals2<=alphas[i])/niter #' } #' #' opar <- par(no.readonly=TRUE) #' par(mfrow=c(1,2), pty="s") #' plot(alphas, errors1, "b", main="Biased Estimator Error", #' xlab="alpha", ylab="error", cex=0.5) #' abline(a=0,b=1, lwd=1.5, col="red") #' plot(alphas, errors2, "b", main="Unbiased Estimator Error", #' xlab="alpha", ylab="error", cex=0.5) #' abline(a=0,b=1, lwd=1.5, col="blue") #' par(opar) #' } #' #' @references #' \insertRef{gretton_kernel_2012}{maotai} #' #' @export mmd2test <- function(K, label, method=c("b","u"), mc.iter=999){ ############################################### # Preprocessing DNAME = deparse(substitute(K)) # 1. K : kernel matrix if (inherits(K, "kernelMatrix")){ kmat = as.matrix(K) } else { kmat = as.matrix(K) } cond1 = (is.matrix(kmat)) cond2 = (nrow(K)==ncol(kmat)) cond3 = isSymmetric(kmat) if (!(cond1&&cond2&&cond3)){ stop("* mmd2test : 'K' should be a kernel matrix.") } mineval = min(base::eigen(kmat, only.values = TRUE)$values) if (mineval<0){ wm = paste("* mmd2test : 'K' may not be PD. Minimum eigenvalue is ",mineval,".",sep="") warning(wm) } # 2. label label = as.vector(as.integer(as.factor(label))) if ((length(label)!=nrow(kmat))||(length(unique(label))!=2)){ stop("* mmd2test : 'label' should be a vector of proper length with 2 classes.") } ulabel = unique(label) # 3. method allmmm = c("b","u") method = match.arg(tolower(method), allmmm) ############################################### # compute statistic id1 = which(label==ulabel[1]); m = length(id1) id2 = which(label==ulabel[2]); n = length(id2) thestat = switch(method, "b" = mmd_biased(kmat[id1,id1], kmat[id2,id2], kmat[id1,id2]), "u" = mmd_unbiased(kmat[id1,id1], kmat[id2,id2], kmat[id1,id2])) ############################################### # Iteration mciter = round(mc.iter) itervals = rep(0,mciter) for (i in 1:mciter){ permuted = sample(m+n) tmpid1 = permuted[1:m] tmpid2 = permuted[(m+1):(m+n)] itervals[i] = switch(method, "b" = mmd_biased(kmat[tmpid1,tmpid1], kmat[tmpid2,tmpid2], kmat[tmpid1,tmpid2]), "u" = mmd_unbiased(kmat[tmpid1,tmpid1], kmat[tmpid2,tmpid2], kmat[tmpid1,tmpid2])) } pvalue = (sum(itervals>=thestat)+1)/(mciter+1) ############################################### # REPORT hname = "Kernel Two-sample Test with Maximum Mean Discrepancy" Ha = "two distributions are not equal" names(thestat) = "MMD" res = list(statistic=thestat, p.value=pvalue, alternative = Ha, method=hname, data.name = DNAME) class(res) = "htest" return(res) } # compute two squared statistics ------------------------------------------ #' @keywords internal #' @noRd mmd_biased <- function(XX, YY, XY){ # parameters m = nrow(XX) n = nrow(YY) # computation return((sum(XX)/(m^2)) + (sum(YY)/(n^2)) - ((2/(m*n))*sum(XY))) } #' @keywords internal #' @noRd mmd_unbiased <- function(XX, YY, XY){ # parameters m = nrow(XX) n = nrow(YY) # computation term1 = (sum(XX)-sum(diag(XX)))/(m*(m-1)) term2 = (sum(YY)-sum(diag(YY)))/(n*(n-1)) term3 = (2/(m*n))*sum(XY) return((term1+term2-term3)) } maotai/R/pdeterminant.R0000644000175000017500000000470414176771675014726 0ustar nileshnilesh#' Calculate the Pseudo-Determinant of a Matrix #' #' When a given square matrix \eqn{A} is rank deficient, determinant is zero. #' Still, we can compute the pseudo-determinant by multiplying all non-zero #' eigenvalues. Since thresholding to determine near-zero eigenvalues is subjective, #' we implemented the function as of original limit problem. When matrix is #' non-singular, it coincides with traditional determinant. #' #' @param A a square matrix whose pseudo-determinant be computed. #' #' @return a scalar value for computed pseudo-determinant. #' #' @examples #' ## show the convergence of pseudo-determinant #' # settings #' n = 10 #' A = cov(matrix(rnorm(5*n),ncol=n)) # (n x n) matrix #' k = as.double(Matrix::rankMatrix(A)) # rank of A #' #' # iterative computation #' ntry = 11 #' del.vec = exp(-(1:ntry)) #' det.vec = rep(0,ntry) #' for (i in 1:ntry){ #' del = del.vec[i] #' det.vec[i] = det(A+del*diag(n))/(del^(n-k)) #' } #' #' # visualize the results #' opar <- par(no.readonly=TRUE) #' plot(1:ntry, det.vec, main=paste("true rank is ",k," out of ",n,sep=""),"b", xlab="iterations") #' abline(h=pdeterminant(A),col="red",lwd=1.2) #' par(opar) #' #' @references #' \insertRef{holbrook_differentiating_2018}{maotai} #' #' @export pdeterminant <- function(A){ ## wrapped-up function if (!is.matrix(A)){ stop("* pdeterminant : input 'A' should be a matrix.") } if (nrow(A)!=ncol(A)){ stop("* pdeterminant : input 'A' should be a square matrix.") } n = nrow(A) k = as.double(Matrix::rankMatrix(A)) if (k==n){ return(base::determinant(A)) } else { multccc = 0.9 del.old = 1 det.old = det(A+del.old*diag(n))/(del.old^(n-k)) for (i in 1:496){ # print(paste("iteration ",i," initiated...",sep="")) del.new = del.old*multccc det.new = det(A+del.new*diag(n))/(del.new^(n-k)) if ((abs(det.new-det.old)/abs(det.old))<1e-5){ return(det.new) } else { del.old = del.new det.old = det.new } } return(det.old) } } # # # personal tests ---------------------------------------------------------- # n = 10 # A = cov(matrix(rnorm(5*n),ncol=n)) # k = as.double(Matrix::rankMatrix(A)) # # ntry = 20 # del.vec = exp(-(1:ntry)) # det.vec = rep(0,ntry) # for (i in 1:ntry){ # del = del.vec[i] # det.vec[i] = det(A+del*diag(n))/(del^(n-k)) # } # plot(1:ntry, det.vec, main=paste("true rank is ",k,"/",n,sep=""),"b") # abline(h=pdeterminant(A),col="red",lwd=1.2) maotai/R/matderiv.R0000644000175000017500000000475114176771675014051 0ustar nileshnilesh#' Numerical Approximation to Gradient of a Function with Matrix Argument #' #' For a given function \eqn{f:\mathbf{R}^{n\times p} \rightarrow \mathbf{R}}, #' we use finite difference scheme that approximates a gradient at a given point \eqn{x}. #' In Riemannian optimization, this can be used as a proxy for #' ambient gradient. Use with care since it may accumulate numerical error. #' #' @param fn a function that takes a matrix of size \eqn{(n\times p)} and returns a scalar value. #' @param x an \eqn{(n\times p)} matrix where the gradient is to be computed. #' @param h step size for centered difference scheme. #' #' @return an approximate numerical gradient matrix of size \eqn{(n\times p)}. #' #' @examples #' ## function f(X) = for two vectors 'a' and 'b' #' # derivative w.r.t X is ab' #' # take an example of (5x5) symmetric positive definite matrix #' #' # problem settings #' a <- rnorm(5) #' b <- rnorm(5) #' ftn <- function(X){ #' return(sum(as.vector(X%*%b)*a)) #' } # function to be taken derivative #' myX <- matrix(rnorm(25),nrow=5) # point where derivative is evaluated #' myX <- myX%*%t(myX) #' #' # main computation #' sol.true <- base::outer(a,b) #' sol.num1 <- matderiv(ftn, myX, h=1e-1) # step size : 1e-1 #' sol.num2 <- matderiv(ftn, myX, h=1e-5) # 1e-3 #' sol.num3 <- matderiv(ftn, myX, h=1e-9) # 1e-5 #' #' ## visualize/print the results #' expar = par(no.readonly=TRUE) #' par(mfrow=c(2,2),pty="s") #' image(sol.true, main="true solution") #' image(sol.num1, main="h=1e-1") #' image(sol.num2, main="h=1e-5") #' image(sol.num3, main="h=1e-9") #' par(expar) #' #' \donttest{ #' ntrue = norm(sol.true,"f") #' cat('* Relative Errors in Frobenius Norm ') #' cat(paste("* h=1e-1 : ",norm(sol.true-sol.num1,"f")/ntrue,sep="")) #' cat(paste("* h=1e-5 : ",norm(sol.true-sol.num2,"f")/ntrue,sep="")) #' cat(paste("* h=1e-9 : ",norm(sol.true-sol.num3,"f")/ntrue,sep="")) #' } #' #' @references #' \insertRef{kincaid_numerical_2009}{maotai} #' #' @export matderiv <- function(fn, x, h=0.001){ if (h <= 0){ stop("* matderiv : 'h' should be a nonnegative real number.") } hval = max(sqrt(.Machine$double.eps), h) return(gradF(fn,x,hval)) } # h = 0.001 # X = matrix(rnorm(9),nrow=3) # X = X%*%t(X) # dX = array(0,c(3,3)) # fX = function(x){return(sum(diag(x%*%x)))} # for (i in 1:3){ # for (j in 1:3){ # Xp = X # Xm = X # Xp[i,j] = Xp[i,j] + h # Xm[i,j] = Xm[i,j] - h # dX[i,j] = (fX(Xp)-fX(Xm))/(2*h) # } # } # dXmaotai/R/RcppExports.R0000644000175000017500000000655514176774070014523 0ustar nileshnilesh# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 compute_SSR <- function(D, Delta) { .Call('_maotai_compute_SSR', PACKAGE = 'maotai', D, Delta) } compute_stress <- function(D, Dhat) { .Call('_maotai_compute_stress', PACKAGE = 'maotai', D, Dhat) } main_bmds <- function(D, X0, sigg0, a, alpha, maxiter, constant, verbose, betas) { .Call('_maotai_main_bmds', PACKAGE = 'maotai', D, X0, sigg0, a, alpha, maxiter, constant, verbose, betas) } aux_shortestpath <- function(wmat) { .Call('_maotai_aux_shortestpath', PACKAGE = 'maotai', wmat) } cppsub_2007Wang <- function(V0, mm, d, Spu, Stu, maxiter, eps) { .Call('_maotai_cppsub_2007Wang', PACKAGE = 'maotai', V0, mm, d, Spu, Stu, maxiter, eps) } gradF <- function(func, xnow, h) { .Call('_maotai_gradF', PACKAGE = 'maotai', func, xnow, h) } dat2centers <- function(data, centers) { .Call('_maotai_dat2centers', PACKAGE = 'maotai', data, centers) } cpp_sylvester <- function(A, B, C) { .Call('_maotai_cpp_sylvester', PACKAGE = 'maotai', A, B, C) } solve_lyapunov <- function(A, B, C) { .Call('_maotai_solve_lyapunov', PACKAGE = 'maotai', A, B, C) } cpp_weiszfeld <- function(X, abstol, maxiter, xinit, weights, epsnum) { .Call('_maotai_cpp_weiszfeld', PACKAGE = 'maotai', X, abstol, maxiter, xinit, weights, epsnum) } cpp_kmeans <- function(data, k) { .Call('_maotai_cpp_kmeans', PACKAGE = 'maotai', data, k) } emds_gamma0 <- function(dmat) { .Call('_maotai_emds_gamma0', PACKAGE = 'maotai', dmat) } cpp_pairwise_L2 <- function(muA, muB, covA, covB) { .Call('_maotai_cpp_pairwise_L2', PACKAGE = 'maotai', muA, muB, covA, covB) } integrate_1d <- function(tseq, fval) { .Call('_maotai_integrate_1d', PACKAGE = 'maotai', tseq, fval) } cpp_pdist <- function(X) { .Call('_maotai_cpp_pdist', PACKAGE = 'maotai', X) } cpp_geigen <- function(A, B) { .Call('_maotai_cpp_geigen', PACKAGE = 'maotai', A, B) } cpp_triangle <- function(D) { .Call('_maotai_cpp_triangle', PACKAGE = 'maotai', D) } cpp_mmds <- function(D, ndim, maxiter, abstol) { .Call('_maotai_cpp_mmds', PACKAGE = 'maotai', D, ndim, maxiter, abstol) } src_smacof <- function(D, W, ndim, maxiter, abstol, use_gutman) { .Call('_maotai_src_smacof', PACKAGE = 'maotai', D, W, ndim, maxiter, abstol, use_gutman) } eval_gaussian <- function(x, mu, cov) { .Call('_maotai_eval_gaussian', PACKAGE = 'maotai', x, mu, cov) } eval_gaussian_data <- function(X, mu, cov) { .Call('_maotai_eval_gaussian_data', PACKAGE = 'maotai', X, mu, cov) } eval_gmm_data <- function(X, mus, covs, weight) { .Call('_maotai_eval_gmm_data', PACKAGE = 'maotai', X, mus, covs, weight) } eval_gmm <- function(x, mus, covs, weight) { .Call('_maotai_eval_gmm', PACKAGE = 'maotai', x, mus, covs, weight) } src_construct_by_knn <- function(nn_idx, intersection) { .Call('_maotai_src_construct_by_knn', PACKAGE = 'maotai', nn_idx, intersection) } src_gaussbary_2002R <- function(array3d, weight, maxiter, abstol) { .Call('_maotai_src_gaussbary_2002R', PACKAGE = 'maotai', array3d, weight, maxiter, abstol) } src_gaussbary_2016A <- function(array3d, weight, maxiter, abstol) { .Call('_maotai_src_gaussbary_2016A', PACKAGE = 'maotai', array3d, weight, maxiter, abstol) } src_cov2corr <- function(covmat) { .Call('_maotai_src_cov2corr', PACKAGE = 'maotai', covmat) } maotai/R/cayleymenger.R0000644000175000017500000000245214176771675014716 0ustar nileshnilesh#' Cayley-Menger Determinant #' #' Cayley-Menger determinant is a formula of a \eqn{n}-dimensional simplex #' with respect to the squares of all pairwise distances of its vertices. #' #' @param data an \eqn{(n\times p)} matrix of row-stacked observations. #' @return a list containing\describe{ #' \item{det}{determinant value.} #' \item{vol}{volume attained from the determinant.} #' } #' #' @examples #' ## USE 'IRIS' DATASET #' data(iris) #' X = as.matrix(iris[,1:4]) #' #' ## COMPUTE CAYLEY-MENGER DETERMINANT #' # since k=4 < n=149, it should be zero. #' cayleymenger(X) #' #' @export cayleymenger <- function(data){ # Preprocessing if (!check_datamat(data)){ stop("* cayleymenger : an input 'data' should be a matrix without any missing/infinite values.") } # compute pairwise distance Dtmp = stats::as.dist(cpp_pdist(data)) # compute and return return(cayleymenger_internal(Dtmp)) } #' @keywords internal #' @noRd cayleymenger_internal <- function(distobj){ Dold = (as.matrix(distobj)^2) n = base::nrow(Dold) Dnew = rbind(cbind(Dold, rep(1,n)), c(rep(1,n),0)) val.det = base::det(Dnew) n = n+1 val.vol = base::sqrt(base::exp(base::log(((-1)^(n+1))*val.det) - n*log(2) - (2*base::lfactorial(n)))) output = list(det=val.det, vol=val.vol) return(output) }maotai/LICENSE0000644000175000017500000000005014176771675012703 0ustar nileshnileshYEAR: 2020 COPYRIGHT HOLDER: Kisung You maotai/inst/0000755000175000017500000000000014176771675012660 5ustar nileshnileshmaotai/inst/REFERENCES.bib0000644000175000017500000001632514176771675014766 0ustar nileshnilesh @inproceedings{arthur_kmeans_2007, title = {K-Means++: The Advantages of Careful Seeding}, booktitle = {In Proceedings of the 18th Annual {{ACM}}-{{SIAM}} Symposium on Discrete Algorithms}, author = {Arthur, David and Vassilvitskii, Sergei}, year = {2007} } @article{butler_laplace_2002, title = {Laplace Approximations for Hypergeometric Functions with Matrix Argument}, author = {Butler, Roland W. and Wood, Andrew T. A.}, year = {2002}, month = aug, volume = {30}, pages = {1155--1177}, doi = {10.1214/aos/1031689021}, journal = {The Annals of Statistics}, number = {4} } @article{eddelbuettel_rcpparmadillo_2014, title = {{{RcppArmadillo}}: {{Accelerating R}} with High-Performance {{C}}++ Linear Algebra}, author = {Eddelbuettel, Dirk and Sanderson, Conrad}, year = {2014}, month = mar, volume = {71}, pages = {1054--1063}, journal = {Computational Statistics and Data Analysis} } @article{floyd_algorithm_1962, title = {Algorithm 97: {{Shortest}} Path}, shorttitle = {Algorithm 97}, author = {Floyd, Robert W.}, year = {1962}, month = jun, volume = {5}, pages = {345}, journal = {Communications of the ACM}, number = {6} } @article{goodall_procrustes_1991, title = {Procrustes {{Methods}} in the {{Statistical Analysis}} of {{Shape}}}, author = {Goodall, Colin}, year = {1991}, volume = {53}, pages = {285--339}, issn = {00359246}, journal = {Journal of the Royal Statistical Society. Series B (Methodological)}, number = {2} } @article{gretton_kernel_2012, title = {A {{Kernel Two}}-Sample {{Test}}}, author = {Gretton, Arthur and Borgwardt, Karsten M. and Rasch, Malte J. and Schölkopf, Bernhard and Smola, Alexander}, year = {2012}, month = mar, volume = {13}, pages = {723--773}, issn = {1532-4435}, journal = {J. Mach. Learn. Res.}, keywords = {hypothesis testing,integral probability metric,kernel methods,schema matching,two-sample test,uniform convergence bounds} } @article{guo_generalized_2003, title = {A Generalized {{Foley}}–{{Sammon}} Transform Based on Generalized Fisher Discriminant Criterion and Its Application to Face Recognition}, author = {Guo, Yue-Fei and Li, Shi-Jin and Yang, Jing-Yu and Shu, Ting-Ting and Wu, Li-De}, year = {2003}, month = jan, volume = {24}, pages = {147--158}, journal = {Pattern Recognition Letters}, number = {1-3} } @inproceedings{henderson_epmeans_2015, title = {{{EP}}-{{MEANS}}: An Efficient Nonparametric Clustering of Empirical Probability Distributions}, booktitle = {Proceedings of the 30th {{Annual ACM Symposium}} on {{Applied Computing}} - {{SAC}} '15}, author = {Henderson, Keith and Gallagher, Brian and {Eliassi-Rad}, Tina}, year = {2015}, pages = {893--900}, publisher = {{ACM Press}}, address = {{Salamanca, Spain}}, doi = {10.1145/2695664.2695860}, isbn = {978-1-4503-3196-8}, language = {en} } @article{holbrook_differentiating_2018, title = {Differentiating the Pseudo Determinant}, author = {Holbrook, Andrew}, year = {2018}, month = jul, volume = {548}, pages = {293--304}, journal = {Linear Algebra and its Applications} } @book{kincaid_numerical_2009, title = {Numerical Analysis: Mathematics of Scientific Computing}, author = {Kincaid, David and Cheney, Elliott W.}, year = {2009}, edition = {3. ed}, publisher = {{American Mathematical Society}}, address = {{Providence, RI}}, annotation = {OCLC: 729930790}, language = {eng}, number = {2}, series = {Pure and Applied Undergraduate Texts} } @inproceedings{kulis_revisiting_2012, title = {Revisiting {{K}}-Means: {{New Algorithms}} via {{Bayesian Nonparametrics}}}, booktitle = {Proceedings of the 29th {{International Coference}} on {{International Conference}} on {{Machine Learning}}}, author = {Kulis, Brian and Jordan, Michael I.}, year = {2012}, pages = {1131--1138}, publisher = {{Omnipress}}, address = {{USA}}, isbn = {978-1-4503-1285-1}, series = {{{ICML}}'12} } @article{kunsch_jackknife_1989, title = {The {{Jackknife}} and the {{Bootstrap}} for {{General Stationary Observations}}}, author = {Kunsch, Hans R.}, year = {1989}, month = sep, volume = {17}, pages = {1217--1241}, journal = {The Annals of Statistics}, language = {en}, number = {3} } @article{ngo_trace_2012, title = {The {{Trace Ratio Optimization Problem}}}, author = {Ngo, T. T. and Bellalij, M. and Saad, Y.}, year = {2012}, month = jan, volume = {54}, pages = {545--569}, journal = {SIAM Review}, language = {en}, number = {3} } @article{oh_bayesian_2001a, title = {Bayesian {{Multidimensional Scaling}} and {{Choice}} of {{Dimension}}}, author = {Oh, Man-Suk and Raftery, Adrian E}, year = {2001}, month = sep, volume = {96}, pages = {1031--1044}, journal = {Journal of the American Statistical Association}, number = {455} } @inproceedings{pekalska_noneuclidean_2006, title = {Non-{{Euclidean}} or {{Non}}-Metric {{Measures Can Be Informative}}}, booktitle = {Structural, {{Syntactic}}, and {{Statistical Pattern Recognition}}}, author = {Pękalska, El\textbackslash.zbieta and Harol, Artsiom and Duin, Robert P. W. and Spillmann, Barbara and Bunke, Horst}, editor = {Yeung, Dit-Yan and Kwok, James T. and Fred, Ana and Roli, Fabio and {de Ridder}, Dick}, year = {2006}, pages = {871--880}, publisher = {{Springer Berlin Heidelberg}}, address = {{Berlin, Heidelberg}}, isbn = {978-3-540-37241-7} } @article{politis_stationary_1994, title = {The {{Stationary Bootstrap}}}, author = {Politis, Dimitris N. and Romano, Joseph P.}, year = {1994}, month = dec, volume = {89}, pages = {1303}, issn = {01621459}, doi = {10.2307/2290993}, journal = {Journal of the American Statistical Association}, number = {428} } @article{sanderson_armadillo_2016, title = {Armadillo: A Template-Based {{C}}++ Library for Linear Algebra}, author = {Sanderson, Conrad and Curtin, Ryan}, year = {2016}, month = jun, volume = {1}, pages = {26}, journal = {The Journal of Open Source Software}, number = {2} } @article{torgerson_multidimensional_1952, title = {Multidimensional Scaling: {{I}}. {{Theory}} and Method}, author = {Torgerson, Warren S.}, year = {1952}, month = dec, volume = {17}, pages = {401--419}, issn = {0033-3123, 1860-0980}, doi = {10.1007/BF02288916}, journal = {Psychometrika}, language = {en}, number = {4} } @inproceedings{wang_trace_2007, title = {Trace {{Ratio}} vs. {{Ratio Trace}} for {{Dimensionality Reduction}}}, booktitle = {2007 {{IEEE Conference}} on {{Computer Vision}} and {{Pattern Recognition}}}, author = {Wang, Huan and Yan, Shuicheng and Xu, Dong and Tang, Xiaoou and Huang, Thomas}, year = {2007}, month = jun, pages = {1--8}, publisher = {{IEEE}}, address = {{Minneapolis, MN}} } @article{warshall_theorem_1962, title = {A {{Theorem}} on {{Boolean Matrices}}}, author = {Warshall, Stephen}, year = {1962}, month = jan, volume = {9}, pages = {11--12}, journal = {Journal of the ACM}, number = {1} } @article{yangqingjia_trace_2009, title = {Trace {{Ratio Problem Revisited}}}, author = {{Yangqing Jia} and {Feiping Nie} and {Changshui Zhang}}, year = {2009}, month = apr, volume = {20}, pages = {729--735}, journal = {IEEE Transactions on Neural Networks}, number = {4} } maotai/inst/include/0000755000175000017500000000000014176771675014303 5ustar nileshnileshmaotai/inst/include/procedures/0000755000175000017500000000000014176771675016456 5ustar nileshnileshmaotai/inst/include/procedures/maoarma_basic.h0000644000175000017500000000374414176771675021415 0ustar nileshnilesh#ifndef MAOTAI_MAOARMA_BASIC_H #define MAOTAI_MAOARMA_BASIC_H #define ARMA_NO_DEBUG // [[Rcpp::depends(RcppArmadillo)]] #include #include "../include/maoarma.hpp" #include using namespace Rcpp; using namespace arma; using namespace std; // auxiliary functiona // * aux_pnorm : given a row vector, compute p-norm inline double aux_pnorm(arma::rowvec x, double p){ assert(p>0); int N = x.n_elem; double output = 0.0; for (int n=0;n(x(n)), p); } return(std::pow(static_cast(output), 1.0/p)); } //--------------------------------------------------------// // main namespace functions inline arma::mat maospace::pdist(arma::mat X, double p){ // prepare int N = X.n_rows; arma::mat output(N,N,fill::zeros); // iterate for (int i=0;i<(N-1);i++){ for (int j=(i+1);j0); int n = X.n_rows; arma::umat indices(n,k,fill::zeros); arma::vec distvec(n,fill::zeros); arma::uvec sorted; for (int i=0;i #include "procedures/maoarma_basic.h" // available functions // (basic:umat) knn : return (n x k) index vector. 0 starting C convention. // (basic:mat) pdist : return (n x n) distance matrix // (basic:mat) pdist2 : return (n x m) distance matrix for X(n) and Y(m) // (basic:double) trace : given a square matrix, compute the trace // auxiliary functions // (basic:double) aux_pnorm : compute p-norm namespace maospace{ arma::umat knn(arma::mat X, int k, double p); arma::mat pdist(arma::mat X, double p); arma::mat pdist2(arma::mat X, arma::mat Y, double p); double trace(arma::mat X); } // How to use this 'hpp' file // * set 'import' and 'linkingto' with maotai // * In C++ scripts, do the followings // - #include "maoarma.hpp" // - // [[Rcpp::depends(maotai)]] // - using namespace maospace // #endif maotai/NAMESPACE0000644000175000017500000000211614176774073013115 0ustar nileshnilesh# Generated by roxygen2: do not edit by hand export(bmds) export(boot.mblock) export(boot.stationary) export(cayleymenger) export(checkdist) export(checkmetric) export(cmds) export(cov2corr) export(cov2pcorr) export(dpmeans) export(ecdfdist) export(ecdfdist2) export(epmeans) export(kmeanspp) export(lgpa) export(lyapunov) export(matderiv) export(mmd2test) export(nef) export(nem) export(pdeterminant) export(shortestpath) export(sylvester) export(trio) export(tsne) export(weiszfeld) import(Rdpack) importFrom(Matrix,rankMatrix) importFrom(RANN,nn2) importFrom(RSpectra,eigs) importFrom(Rcpp,evalCpp) importFrom(Rtsne,Rtsne) importFrom(cluster,pam) importFrom(cluster,silhouette) importFrom(dbscan,dbscan) importFrom(fastcluster,hclust) importFrom(shapes,procGPA) importFrom(stats,as.dist) importFrom(stats,cov) importFrom(stats,dist) importFrom(stats,ecdf) importFrom(stats,knots) importFrom(stats,lm) importFrom(stats,quantile) importFrom(stats,rgamma) importFrom(stats,rgeom) importFrom(stats,rnorm) importFrom(stats,runif) importFrom(stats,var) importFrom(utils,packageVersion) useDynLib(maotai)