multiDimBio/0000755000176200001440000000000013532240373012472 5ustar liggesusersmultiDimBio/NAMESPACE0000644000176200001440000000114312725254706013720 0ustar liggesusers# Default NAMESPACE created by R # Remove the previous line if you edit this file # Export all names exportPattern(".") # Import all packages listed as Imports or Depends importFrom(grDevices, colorRampPalette, dev.off, palette, pdf, recordPlot) importFrom(graphics, barplot, hist, layout, persp, plot, segments, text) importFrom(stats, aggregate, binomial, deviance, df, glm, interaction.plot, lm, na.omit, p.adjust, predict, rbinom, rnbinom, rnorm, runif, sd) importFrom(utils, timestamp) importFrom(lme4, glmer, VarCorr) import( ggplot2, pcaMethods, misc3d, MASS, RColorBrewer, gridGraphics ) multiDimBio/data/0000755000176200001440000000000013407335672013413 5ustar liggesusersmultiDimBio/data/Dyad.RData0000644000176200001440000000045511654037625015214 0ustar liggesusers]N@Dg%d!ְ$$=Ys d $~Hó7ּX7BIHe9zX}!ZX?omMp.@to=e[pg=f}5w\{:psgm37 A}n-.?_#]3y.g{Hߪkh=<1ңW5N4N545.4.Qǚ::ZXMMMMMMuNTǨQca:Fu3;4sI I…ltߗ[n>?qyAmultiDimBio/data/CondA.RData0000644000176200001440000000023111652364622015305 0ustar liggesusers r0b```b`bf H01':82009P &$G&.f8#qlff>=%rRRs,*(;08Ks\̼If7jߩwjQmultiDimBio/data/Scores.RData0000644000176200001440000000445611652364650015575 0ustar liggesusers]y4xolb_2d$k"ϥ͐-&[Ì#)W Cb%udrmckdOT*^Μ9y9|O"< n<eӆ7ʵ?o[7}Y48|sc-w&E lj%_BHlfV.;]@ӗT 's(=Kq k:<\4~,ra+\bߔs~kOI\طJƸaP{E[źFe5]Tqt^V"ΘiC{Žd|CU XpOE] {~_ o 1eG!@ǴqP0Z8)ٻ?.f(|{6ݒ"ZW(/z2reʤ;y;7dXMiJ[y*i/YTsR(W'ߪ=|"D0ȇO#WuC~Tb $ O; W8a+, 6 > ~~eEz;Q3y|)he$v{(6''лg,?r((DՔW̚mϑRy b͸&u>s9o i>mudq`Js>Q6Ug<ꇾ'=YqObIVz]auT:ܫ42+!);\r39U~Gh%c{?I:,ESC,(Η׎qG}%?DzEݾ ^pNGQ`>:Qͬg-Pev3*|c>ҍXxe\QcZoBM@xFàX>D{ڈjNu(G(2si ځۼpϳ QNk^o)GRoXPW;8}Щ]ܥLČzdezuR?Z{D3lu3'y?b$n?cT @I8O5,6%ԄB>Ɖ1(h淒 tqZ,M%vOgTx-#PV 2idb/<;%?4ђDvފ_D٬-\kKzأ2^xhUr X'OŸc!@n;U K2,H^R*^EfXQhL gGجHI20[1zw̅زwUyW7l8?:?CX"k:]>1$̥݉fEbp5jH7!UC,0^ ?sg.GUu;U:Pm']-(ևQ4&ư R;S0cma%@9C]mhʉFȣe!Jl8-6f&Xoڡ!r׏b9? S=)s#eWBe&V$+r9|at t݀ףH8G 5ȇ{=0C6~ 2sMK7%^VU?b)itG{C7sEb|ntbykqKXrƺm;>aYeOC] T!9)2Kzϲ|SA,Q.bⵛ{R]8?(MVzrhrl9B^qd֚Plg( mF9i!`韙ӱ~yjK%qu_py㊼`һU U߿4?}5Қl6Ue9GP–s7@4IGI?t?kE$j} \$94 Lg9PUZB8x:F;sg;3MҪy=\'QSg?Ϳ|y (jLg4Z Ȭx0[M 桪 |)K|5ׂe,9&Lֺ;>^^7-oSF~]Op oaǥٶkPws亮$sb Y @^zPzG/ޅ~vE KH:UBtb8E |P[ vҒ4%̯f2R7w4FCVw R95V[?d3m*2(2M+yicu|tܳ  137)oĻ~0H;MIuOީ-=krGɀye9WGTA _#_ԗ~2t<.%$A]Pkm9)} s.}{[M5._O&u0fꋠ[q]vYeSw0Zmc4y%aPc<Fy20oz QZ{=WѿvLAN9}PfD >oIwwMH(?ZSLp{t9UqW?P`0.wR^聕:K`L/}V9/GwjZ L5m҆Ïpj+5c:ׂ{\*1LR'(2hl~q4,3)G3(׭tt*TJb˔-T;$wt!7f7tLԄ귓I VO ئ}6v::A_5&<2Ʉ]{A4⤱2XA>QtxP]4k W%S2aPRȻsSy{.{1YtgT8S B.ɪY`Y c`xxsS.oW3\/ @2GKߠK q^(+G8> X%מ[%eR>aWGoP4x*v6[ǐr gl` Σ64bS w%ONt'& ]*6ȊUWS0lQʸׄ Yy,LOĕsqit^$ɂ{vQw|.hQ{!\pެ`ZOG {*ܶeΖXZdOR&0|Yт.f_5<nSe,qKa \a8)%z__qeG/DglphP 7's6 "-Ξ n)h]4' sMdx][>= mL*v \nX]&2zN%@}W=%X82,5;#mQ'7Jp٭3oq}݃`ͮsY58+V.\;RuUlFʸFǵjJ¯+r:lҒ@]\G[x- ,#H*0Y|p^{$v׼Af$rgKԍyn`ҝZ;|JM4 d~-uF- 5ܰs|A~~kuL#O(oAcwq)ƣ yc' \S^6,MoF (5_ Xo,^>c7owe*5˞M=l*qXwh8)vĀL2 HcwPv7+h,~ۗ]G6D0↛8Ѓ8wuf ;Y]"k;A\ς,ߵpɘ>s}ۃK>ވ,㧿UYo"p=]<(% =y+x3E9 v/@Jzx; Ֆy gzU䏥 >"y( Y5HE Kp7SL"WzMUۻAv!ԟw JȘV?9ЕҊaP;[ֵIlӮY+/M|yЂ^ K_inrp/S5^H_o h' 5—o7m2xʹZ^0y"~/ W^S5I-yoב|{@/59? .ús%^m[qox0 olgf |^};e f;|tU5 ey^ߐehy?7Z >| D~g|ކsFONE<[wk?On4w-'aG>4j惻\gB9閄IY`` GV`^oXHG[ξc³l ֆ_*A0Nnƈ{0{j @1y5GG$6:BtrbOz`IKVhdv1^T96uhdx0_mIs',YIogI{%xV!_Ыٕ[' DZ}t׶Mm(sq-Fg^ /PHj]@do^mR?p[Ə #>o:];~Y^!e<~Bd'sF=hd:m\Bf61yFѬ:B)Pŗ.`޿Wqv~7O?3; Bcfv6~ҿxDϺ*)~P]"j{QLun&H<pП<r=XoEύ;(O tGM'e^U`; ;*ѻunozğ%LMg6H?i zb>xԂ08ROD8!k2Xż'};rG=6s xױD9>ׄmYYwnއP%_@zҫz_+45o~Cc{YŸbx껥r:6),FН^}>x\ݳ鯓T.[1`گ}#_N^+ hŽE Ppg]Q1Hp>9o$qoFѡz>GQ6xw=+:q*-e>3Sq,#p;kMc.cMfF.aOѓ"g)R7~m!/Bz"yO Se~h!km# n I/)Dޜ6_AW|pd ͵w ̦a\mWl9㜾窃=y6!=;lPZ&i/6bBJqW\hG6;eF[[+Jԝف|GVg> ƉK 2gb{@j~d?o_ ^xG(ucmAml:dT:/A*4h#xr/k߶R|VԖb&g^0J3x+M\.eWVn* ";.Z.dOsڹ t]ܴBC 0f:4$Ȼfu[}kt1N, z_4eAf˼dGD郈;qt¾Y':*XSneX){uqU ~nŠm]v.'l9IXIn`oAzmU%(um)ĀPexNPv{1CA%T?KҠɥHd qqBac+OOwų԰*m*ewv7>SVfE dQ[ jhecvuiN8~sMbMM|g$&z$f(*k.!\b?,^ϥlKK⎁#@zR3V#.{Av!K"{X<F-|CcR(]=moLTz+nuT6w҂ pg;Sܗ^+U]~ ']\ k9Ю_VYʆpPQvΓ!re:5Y) Fw,:I| A:ti`H&\?OIg:ۉܔu3Az6Ȝ WǼ@5 ~\Ӟm*׼ x's2jYc&R}`۽!y,Uyu'#҃SLx'-SƓW@~TP+%66-McM^tx(wk*(Nyk|]nkBw\1?D4mM@pYK_~GW͠*VX8/Gch8||mr?45EOnӮ3`=PTU&UQmD#}Nn9%b%*ZN⼝F Npx3@~֞ RֱKs{|ѽDЏy3+WNU to learn more about these objects, e.g. ?Nuclei Type ? to see examples of the function's use, e.g. ?FSelect } \author{ Samuel V Scarpino Maintainer: Samuel V Scarpino } \references{ Collyer M, Adams D. (2007) Analysis of Two - State Multivariate Phenotypic Change in Ecological Studies. Ecology: 88(3) 683 - 692. Costanza M, Afifi A. (1979) Comparison of Stopping Rules in Forward Stepwise Discriminant Analysis. Journal of the American Statistical Association: pp. 777 - 78 Crews D, Gillette R, Scarpino SV, Manikkam M, Savenkova MI, Skinner MK. (2012) Epigenetic Transgenerational Alterations to Stress Response in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA: 109(23) 9143 - 9148. Davies SW, Scarpino SV, Pongwarin T, Scott J, Matz MV. (2015) Estimating Trait Heritability in Highly Fecund Species. G3: Genes| Genomes| Genetics: 5(12) 2639 - 45. Habbema J, Hermans J. (1977) Selection of Variables in Discriminant Analysis by F-Statistics and Error Rate. Technometrics: 19(4) 487 - 493. Jennrich R. (1977) Stepwise discriminant analysis, volume 3. New York Wiley Sons. Roweis S. (1997) EM algorithms for PCA and sensible PCA. Neural Inf. Proc. Syst.: 10 626 - 632. Stacklies W, Redestig H, Scholz M, Walther D, Selbig J. (2007) pcaMethods - a Bioconductor package providing PCA methods for incomplete data. Bioinformatics: 23 1164 - 1167. Troyanskaya O, Cantor M, Sherlock G, Brown P, Hastie T, Tibshirani R, Botstein D, Altman R. (2001) Missing value estimation methods for DNA microarrays. Bioinformatics: 17(6) 520 - 5252. } \seealso{ \code{\link[pcaMethods:pcaMethods]{pcaMethods}} } multiDimBio/man/IntPlot.Rd0000644000176200001440000000230212725251216015123 0ustar liggesusers\name{IntPlot} \alias{IntPlot} \title{A function to visualize the results of a MANOVA } \description{ The function produces an interaction plot to demonstrate the results of a MANOVA using the function interaction.plot. } \usage{ IntPlot(Scores, Cov.A, Cov.B, pvalues = rep(1, 8), int.pvalues = rep(1, 4)) } \arguments{ \item{Scores}{ A (non-empty) numeric matrix of principle component scores or raw data. } \item{Cov.A}{ A (non-empty) bivariate factor vector indicating the factor for each row in Scores } \item{Cov.B}{ A (non-empty) bivariate factor vector indicating the factor for each row in Scores } \item{pvalues}{ An optional vector of p values for each covariate across Scores. The length of pvalues must equal the number of columns in Scores times 2. } \item{int.pvalues}{ An optional vector of p values for each interaction. The length of int.pvalues must equal the number of columns in Scores. } } \value{ a list of plots stored as grid plots. } \seealso{ \code{\link{interaction.plot}} } \examples{ data(Scores) data(CondA) data(CondB) pvals<-c(0.03,0.6,0.05,0.07,0.9,0.2,0.5,0.3) int.pvals<-c(0.3,0.45,0.5,0.12) IntPlot(Scores,CondA,CondB,pvalues=pvals, int.pvalues=int.pvals) } multiDimBio/man/LandscapePlot.Rd0000644000176200001440000000311612725252231016265 0ustar liggesusers\name{LandscapePlot} \alias{LandscapePlot} \title{ A function to visualize the Functional Landscape of measured traits } \description{ This function plots a three-dimensional landscape of measured traits. The peak heights are relative with respect to the input data. The width of each peak is controlled by the argument sigma and has only an aesthetic purpose. The 3D image is generated using the \code{\link[misc3d:drawScene]{drawScene}} and \code{\link[misc3d:surfaceTriangles]{surfaceTriangles}} . } \usage{ LandscapePlot(Data, Groups=NULL, PDF=FALSE,LocPlot=FALSE,control=c(75,1,30)) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{Data}{ A (non-empty) numeric matrix with trait values } \item{Groups}{ A (non-empty)factor vector indicating the group membership of each row in Data. If there is only a single group present in Data then Groups=NULL (default). } \item{PDF}{ Logical controlling whether to output the results as a .pdf or a .jpeg. The default (PDF=FALSE) will produce a .jpeg. The file size for .pdf output can be large. } \item{LocPlot}{ Logical controlling whether to output a .pdf naming the peaks according to the columns they represent. The defaul is FALSE. } \item{control}{ An optional numeric vector setting the control parameters for persp. control[1] = theta, control[2] = r, control[3] = phi } } \value{ a list of plots stored as grid plots (or.pdf if PDF=TRUE) file for each column in data. } \examples{ data(Nuclei) data(Groups) #plotting the first six columns #not run #LandscapePlot(Nuclei[,1:6], Groups=Groups) } multiDimBio/man/boxWhisker.Rd0000644000176200001440000000133612716112713015663 0ustar liggesusers\name{boxWhisker} \alias{boxWhisker} \title{A function to create a box and whisker plot by group ID } \description{ A function to create a box and whisker plot by group ID. } \usage{ boxWhisker(data, groups, palette = "Paired") } \arguments{ \item{data}{ a (non-empty) matrix of data values } \item{groups}{ a (non-empty) vector of group IDs with length equal to the number of rows in data } \item{palette}{ A color palette for plotting. The default is 'Paired.' See colorbrewer2.org for alternatives. } } \value{ Returns a box-whisker plot of the data by group ID. } \examples{ data(Nuclei) data(Groups) boxWhisker(Nuclei, Groups) #changing the color palette boxWhisker(data = Nuclei, groups = Groups, palette = 'Set1') } multiDimBio/man/Groups.Rd0000644000176200001440000000144512236235467015027 0ustar liggesusers\name{Groups} \alias{Groups} \docType{data} \title{The group ID for animals contained in the data set Nuclei } \description{ Animals measured in the Nuclei data set belong to one of four groups determined by their linneage (Vinclozolin or Control) and their stress treatment (Stressed or Non-Stressed). } \usage{ data(Groups) } \format{ A factor vector indicating which group the individuals in Nuclei are in. } %\details{ % %} \source{ The data are provided courtesy of David Crews at the University of Texas at Austin. } \references{ Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). 9143 - 9148. } \examples{ data(Groups) } multiDimBio/man/PermuteLDA.Rd0000644000176200001440000000576012241024146015500 0ustar liggesusers\name{PermuteLDA} \alias{PermuteLDA} \title{ A function to determine whether two groups are in statistically different locations in multivariate space See Collyer and Adams 2007 } \description{ The function calculates the multivariate distance between two groups across all traits and determines whether they differ signifcantly using a Monte Carlo randomization test. The Monte Carlo randomization creates a null distribution by randomizing the residual deviation from the group mean across all individuals. This method controls for heteroscedasticity and was designed by Collyer and Adams (2007) for use in analyzing data sets that have sparse groups sizes relative to the number of traits. } \usage{ PermuteLDA(Data, Groups, NPerm, Missing.Data = "Complete") } %- maybe also 'usage' for other objects documented here. \arguments{ \item{Data}{ A (non-empty), numeric matrix of data values } \item{Groups}{ A (non-empty), vector indicating group membership. } \item{NPerm}{ The number of permutations used to generate the null distribution. The default is 100. } \item{Missing.Data}{ The method used to handle missing data. The default, 'Complete' will use CompleteData to impute missing data, setting Missing.Data='Remove' will remove all individuals with missing data. FSelect cannot handle missing data. } } \details{ Determining the statistical significance of a discriminate function analysis along with performing that analysis on sparse data sets, e.g. many traits observed on comparatively few individuals, is a challenge. Collyer and Adams (2007) developed a Monte Carlo based algorithm for addressing both of those issues. Briefly, the test uses the underlying Var/Cov structure of the data and randomizes the group membership to calculate a null distribution. This test simultaneously controls for heteroscedasticity, a common problem in sparse data sets and allows the approximation of a p-value for the test. For the original implementation and formulation of the method see Collyer and Adams (2007) or http://www.public.iastate. edu/~dcadams/software.html. Unlike the FSelect implementation, PermuteLDA will work properly with an arbitrary number of groups. The time required to run the algorithm is non-linear in the number of groups. } \value{ Returns a data frame with four columns and the number of groups choose 2 rows. Each row is a pairwise comparison between groups. The column 'Pr' is the p value to reject the null hypothesis of no difference (a value in 'Pr' < 0.05 would result in rejecting the hypothesis that the two groups are not different. The column 'Distance' is the multivariate distance between the two groups. } \references{ Collyer M, Adams D (2007). Analysis of Two - State Multivariate Phenotypic Change in Ecological Studies. Ecology, 88(3), 683 - 692. For an implementation of the original method coded in R see http://www.public.iastate. edu/~dcadams/software.html. } \seealso{ \code{\link{PermuteLDA}} } \examples{ data(Nuclei) data(Groups) PermuteLDA(Nuclei,Groups,50) } multiDimBio/man/Scores.Rd0000644000176200001440000000136112236235605014775 0ustar liggesusers\name{Scores} \alias{Scores} \docType{data} \title{Principle component scores based on the data in Nuclei } \description{ Principle component scores were computed using PPCA for the data set Nuclei. } \usage{ data(Scores) } \format{ A numeric matrix with 4 columns and the same number of rows as Nuclei. There are no missing values. } \source{ The data are provided courtesy of David Crews at the University of Texas at Austin. } \references{ Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). 9143 - 9148. } \examples{ data(Scores) data(Nuclei) SCORES<-PPCA(Nuclei)@scores } multiDimBio/man/getP.Rd0000644000176200001440000000240112737461600014434 0ustar liggesusers\name{getP} \alias{getP} \title{ An internal function for getting empirical p values } \description{ Simulates p values. } \usage{ getP(ndads, mm, vv, tau2, nperms, nsims, nbins) } \arguments{ \item{ndads}{ a (non-empty) numeric value indicating the number of dads. } \item{mm}{ a (non-empty) numeric value indicating the mean number of offspring per dad per bin (normal dist). mm must be less than vv. } \item{vv}{ a (non-empty) numeric value indicating the variance in offspring per dad per bin (normal dist). vv must be greater than mm. } \item{tau2}{ a (non-empty) numeric value indicating the dad effect (narrow-sense heritability ~ tau2/(tau2+(pi/sqrt(3))^2)). } \item{nperms}{ a (non-empty) numeric value indicating the number of bootstrap permutations to use for caluclating a p value. } \item{nsims}{ a (non-empty) numeric value indicating the number of simulations to run per parameter combination. } \item{nbins}{ a (non-empty) numeric value indicating the number of bins, data are pooled before analysis. } } \value{ Returns a vector of simulated p values. The list contains: } \examples{ ndads <- c(9,18) mm <- 4.629634 vv <- 6.31339 tau2 <- c(0,0.5) nperms <- 2 nsims <- 2 nbins <- 3 getP(ndads, mm, vv, tau2, nperms, nsims, nbins) } multiDimBio/man/makeCompMat.Rd0000644000176200001440000000116012241024207015720 0ustar liggesusers\name{makeCompMat} \alias{makeCompMat} \title{A function to create a pairwise comparison matrix } \description{ This function creates a pairwise comparison matrix for n groups. All possible pairwise combinations are created, with rows in the matrix equal to the desired comparison. } \usage{ makeCompMat(ng) } \arguments{ \item{ng}{ A single number indicating the total number of unique groups } } \value{ Returns a matrix with two columns and ng choose 2 rows. } \seealso{ \code{\link{PermuteLDA}} } \examples{ makeCompMat(3) makeCompMat(4) data(Groups) NGroups<-length(unique(Groups)) makeCompMat(NGroups) } multiDimBio/man/PercentMax.Rd0000644000176200001440000000133311654311540015600 0ustar liggesusers\name{PercentMax} \alias{PercentMax} \title{ A function to scale data to the percent of the maximum observed } \description{ This function rescales the columns in a data matrix to the percent of the maximum observed value. The variance is not scaled and missing values are ignored in the calculation. } \usage{ PercentMax(DATA) } \arguments{ \item{DATA}{ A (non-empty) matrix with data values. Columns should be different traits and rows unique observations of those traits } } \value{ Returns a matrix with the same dimensions as DATA. } \seealso{ \code{\link{ZTrans}}, \code{\link{MeanCent}} } \examples{ data(Nuclei) colMeans(Nuclei, na.rm=TRUE) Nuclei.PM<-PercentMax(Nuclei) colMeans(Nuclei.PM, na.rm=TRUE) } multiDimBio/man/CondA.Rd0000644000176200001440000000137112236235323014521 0ustar liggesusers\name{CondA} \alias{CondA} \docType{data} \title{Treatment condition for animals contained in the data set Nuclei } \description{ Animals measured in the Nuclei data set were either from linneages exposed to the fungicide Vinclozolin (Vinclozolin) or not (Control). } \usage{ data(CondA) } \format{ A factor vector indicating which treatment group the individuals in Nuclei belong to. } \source{ The data are provided courtesy of David Crews at the University of Texas at Austin. } \references{ Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). 9143 - 9148. } \examples{ data(CondA) } multiDimBio/man/PPCA.Rd0000644000176200001440000000417412241023133014252 0ustar liggesusers\name{PPCA} \alias{PPCA} \title{ A function to perform a probabilistic principle component analysis } \description{ Performs a probabilistic principle component analysis using the function 'pca' in the package'pcaMethods' } \usage{ PPCA(Data, nPCs=4, CENTER=TRUE, SCALE='vector') } %- maybe also 'usage' for other objects documented here. \arguments{ \item{Data}{ A (non-empty), numeric matrix of data values } \item{nPCs}{ The number of resulting principle component axes. nPCs must be less than or equal to the number of columns in Data. } \item{CENTER}{ A logical statement indicating whether data should be centered to mean 0, TRUE, or not, FALSE. } \item{SCALE}{ A character string indicating which method should be used to scale the variances. The default setting is 'vector.' } } \details{ In PPCA an Expectation Maximization (EM) algorithm is used to fit a Gaussian latent variable model ( Tippping and Bishop (1999)). A latent variable model seeks to relate an observed vector of data to a lower dimensional vector of latent (or unobserved) variables, an approach similar to a factor analysis. Our implementation is a wrapper around the pcaMethods functions ppca and svdimpute (Stacklies et al. (2007)) and is included mainly for convience. The method used in pca was adapted from Roweis (1997) and a Matlab script developed by Jakob Verbeek. } \value{ Returns an object of class 'pcaRes.' See documentation in the package code{\link[pcaMethods:pcaMethods]{ pcaMethods}} } \references{ Roweis S (1997). EM algorithms for PCA and sensible PCA. Neural Inf. Proc. Syst., 10, 626 - 632. Stacklies W, Redestig H, Scholz M, Walther D, Selbig J (2007). pcaMethods - a Bioconductor package providing PCA methods for incomplete data. Bioinformatics, 23, 1164 - 1167. Tippping M, Bishop C (1999). Probabilistic Principle Componenet Analysis. Journal of the Royal Statistical Society. Series B (Statistical Methodology), 61(3), 611 - 622. } \seealso{ \code{\link[pcaMethods:pcaMethods]{pcaMethods}}, \code{\link{pca}} } \examples{ data(Nuclei) PPCA1<-PPCA(Nuclei, nPCs=2, CENTER=TRUE, SCALE='vector') Scores1<-PPCA1@scores } multiDimBio/man/simPower.Rd0000644000176200001440000000215712737461545015362 0ustar liggesusers\name{simPower} \alias{simPower} \title{ An internal function of binomPower, which actually calculates the p value } \description{ An internal function of binomPower, which actually calculates the p value. } \usage{ simPower(ndads,mm,vv,tau2,nperms,nbins) } \arguments{ \item{ndads}{ a (non-empty) numeric value indicating the number of dads. } \item{mm}{ a (non-empty) numeric value indicating the mean number of offspring per dad per bin (normal dist). mm must be less than vv. } \item{vv}{ a (non-empty) numeric value indicating the variance in offspring per dad per bin (normal dist). vv must be great than mm. } \item{tau2}{ a (non-empty) numeric value indicating the dad effect (narrow-sense heritability ~ tau2/(tau2+(pi/sqrt(3))^2)). } \item{nperms}{ a (non-empty) numeric value indicating the number of bootstrap permutations to use for caluclating a p value. } \item{nbins}{ a (non-empty) numeric value indicating the number of bins, data are pooled before analysis. } } \value{ Returns a p value for a given set of conditions over a specificed number of bootstrap permutations. } \examples{ #not run } multiDimBio/man/Power.Rd0000644000176200001440000000440012725254150014627 0ustar liggesusers\name{Power} \alias{Power} \title{ A function to estimate the error rate for FSelect and PermuteLDA. } \description{ Methods are implemented to compute the statistical power, in terms of the type II error rate, based on anticipated sample and effect sizes for FSelect() and PermuteLDA(). By default the power of both tests are determined by iterating over a range of effect and sample sizes. The default settings were selected to be representative of many behavioral genetic studies; however, users can input alternative sample and effect sizes. For high values of trials this function can be very slow. } \usage{ Power(func = "PermuteLDA", N = "DEFAULT.N", effect.size = "DEFAULT.e", trials = 100) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{func}{ A character string indicating which function to compute the power for, can be either 'PermuteLDA' or 'FSelect' } \item{N}{ A (non-empty) vector of group sizes. The lenght of N must be greater than 1 and tha minimum group size for 'FSelect' can not be less than 6. The size of each group is N/2. } \item{effect.size}{ A (non-empty) vector or single value of effect sizes. } \item{trials}{ A number indicating the number of trials for each combination of N and effect.size to calculate the power. } } \details{ The algorithm for the power analysis proceeds as follows: 1. Input sample and effect sizes 2. Set the number of significant effects, e to 0. Note - Total number of traits is fixed at 6 3. Draw random deviates for the given sample size for 6 traits. Note - All traits not significant under this iteration are drawn from a N(0,1) distribution. 4. Perform either FSelect() or PermuteLDA() and record the results. 5. Return to step 3 N times, recording the results each time. Note - N is set using the trials input 6. If e<5 return to step 2 and set the number of significant effects to e+1 7. Proceed to the next combination of sample and effect size. 8. Output the results for each combination of sample and effect size as a function of the number of significant traits. } \value{ Outputs a list with plots and results for each effect size. } \seealso{ \code{\link{PermuteLDA}},\code{\link{FSelect}} } \examples{ #not run #Power(func = 'FSelect', N=c(6,8), effect.size=0.5, trials = 2) } multiDimBio/man/h2Estimate.Rd0000644000176200001440000000562613532230224015544 0ustar liggesusers\name{h2Estimate} \alias{h2Estimate} \title{ Estimates the heritability of a binomial trait } \description{ Estimates the narrow-sense heritability of a binomial trait and calculates a p value by randomization. } \usage{ h2Estimate(data,nreps=1000) } \arguments{ \item{data}{ a (non-empty) numeric matrix with three columns. The first two should contain the trait data (number of occurances of each outcome type) and the third should contain the group ids. } \item{nreps}{ a (non-empty) numeric value indicating the number of resamples to perform when calculating the emperical p value. } } \details{ Estimates the narrow-sense heritability of a binomial trait. This function works by fitting two models, one with and one without a random-effect of sire. These models are compared by randomizing the sire ids nreps times and re-fitting the model. For each of the nreps model pairs, a deviance is calculated and a "p value" estimated by comparing that distribution of deviance to the observed. The heritability is approximatly tau2/(tau2+(pi/sqrt(3))^2), where tau2 is the random-effect variance due to sire. } \value{ Returns a list. The list contains: \item{h2}{ The estimated narrow-sense heritability. The narrow-sense heritability is approximatly tau2/(tau2+(pi/sqrt(3))^2), where tau2 is the random-effect variance due to sire. } \item{pval}{ The probability that the best-fit model includes an extra variance term for sire (random effect of dad). The value is calculated by comparing the deviances from nreps number of randomized model comparisions. } \item{deviance}{ The deviance between a null model without a random effect of dad and a model with. } \item{sim}{ The simulated deviances used in calculating the p value in pval. } \item{obsMod}{ The glmer model object resulting from the observed data. } } \examples{ #non-zero heritability ndads <- 18 mm <- 4 vv <- 6 tau2 <- 2.5 nbins <- 3 mylogit <- function(x) log(x/{1-x}) ilogit <- function(x) 1/{1+exp(-x)} swimprob <- ilogit(rnorm(ndads, 0, sqrt(tau2))) mytable <- NULL for(i in 1:ndads) { bincounts <- pmax(1,rnbinom(nbins, mu = mm, size = mm^2/{vv-mm})) swim <- rbinom(3, bincounts,swimprob[i]) set <- bincounts - swim theserows <- data.frame(set=set,swim=swim, Dad = i, Bin = 1:nbins) mytable <- rbind(mytable, theserows) } est <- h2Estimate(mytable,nreps=10) print(est$h2) #zero heritability ndads <- 18 mm <- 4 vv <- 6 tau2 <- 0 nbins <- 3 mylogit <- function(x) log(x/{1-x}) ilogit <- function(x) 1/{1+exp(-x)} swimprob <- ilogit(rnorm(ndads, 0, sqrt(tau2))) mytable0 <- NULL for(i in 1:ndads) { bincounts <- pmax(1,rnbinom(nbins, mu = mm, size = mm^2/{vv-mm})) swim <- rbinom(3, bincounts,swimprob[i]) set <- bincounts - swim theserows <- data.frame(set=set,swim=swim, Dad = i, Bin = 1:nbins) mytable0 <- rbind(mytable0, theserows) } est0 <- h2Estimate(mytable0,nreps=10) print(est0$h2) } multiDimBio/man/Loadings.Rd0000644000176200001440000000176512725253222015305 0ustar liggesusers\name{Loadings} \alias{Loadings} \title{ A function to visualize trait loadings onto discriminant function and principle component axes } \description{ This function produces barplots representative of the contribution of a particular trait or variable to either a discriminant function or principle component axis. } \usage{ Loadings(DATA, GROUPS, method = c("PCA", "LDA")) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{DATA}{ A (non-empty) numeric matrix with trait values } \item{GROUPS}{ A (non-empty)factor vector indicating the group membership of each row in DATA } \item{method}{ An optional list indicating whether the results for a principle component analysis, 'PCA', or linear discriminant analysis, 'LDA' should be performed. } } \value{ Outputs a list with values and plots for each test listed in method. } \seealso{ \code{\link{pca}}, \code{\link{lda}} } \examples{ data(Nuclei) data(Groups) Loadings(Nuclei, Groups, method=c("PCA", "LDA")) } multiDimBio/man/CondB.Rd0000644000176200001440000000135512236235351014525 0ustar liggesusers\name{CondB} \alias{CondB} \docType{data} \title{Stress condition for animals contained in the data set Nuclei } \description{ Animals measured in the Nuclei data set were either subjected to chronic restraint stress (stress) or not (control). } \usage{ data(CondB) } \format{ A factor vector indicating which stress group the individuals in Nuclei belong to. } %\details{ % %} \source{ The data are provided courtesy of David Crews at the University of Texas at Austin. } \references{ Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). 9143 - 9148. } \examples{ data(CondB) } multiDimBio/man/Nuclei.Rd0000644000176200001440000000271712236235570014765 0ustar liggesusers\name{Nuclei} \alias{Nuclei} \docType{data} \title{ Brain activity in 14 brain regions for 71 individuals } \description{ The activity in 14 brain nuclei were measured in rats that were in one of four groups: 1) Non-stressed, Control 2) Stressed, Control 3) Non-stressed, Vinclozolin 4) Stressed, Vinclozolin } \usage{ data(Nuclei) } \format{ A numeric matrix with 71 individuals as rows and the activity of 14 brain nuclei as columns. NAs indicate missing data. } \details{ Two different cohorts of male rats of the F3 generation of Vinclozolin (Vinclozolin-Lineage) and Vehicle Control (Control-Lineage) Lineages produced at Washington State University are shipped to the University of Texas on the day after weaning. Rats are randomly pair-housed (one Control-Lineage and one Vinclozolin-Lineage animal) and remain in these dyads throughout the duration of the study. Half of the dyads are randomly chosen to receive chronic restraint stress (CRS) treatment for 6 hours daily for 21 consecutive days commencing 1 hr after lights off. Activity in 14 brain nuclei were measured at the end of the study. } \source{ The data are provided courtesy of David Crews at the University of Texas at Austin. } \references{ Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). 9143 - 9148. } \examples{ data(Nuclei) } multiDimBio/man/binomPower.Rd0000644000176200001440000000363712737461506015677 0ustar liggesusers\name{binomPower} \alias{binomPower} \title{ Power analysis for estimating the heritability of a binomial trait } \description{ Performs a power analysis for estimating the heritability of a binomial trait. This function can take a long time to run if either nsims or nperms is large. } \usage{ binomPower(ndads, mm, vv, tau2, nperms, nsims, nbins, alpha = 0.05, doPlot=FALSE) } \arguments{ \item{ndads}{ a (non-empty) numeric value indicating the number of dads. } \item{mm}{ a (non-empty) numeric value indicating the mean number of offspring per dad per bin (normal dist). mm must be less than vv. } \item{vv}{ a (non-empty) numeric value indicating the variance in offspring per dad per bin (normal dist). vv. must be greater than mm. } \item{tau2}{ a (non-empty) numeric value indicating the dad effect (narrow-sense heritability ~ tau2/(tau2+(pi/sqrt(3))^2)). } \item{nperms}{ a (non-empty) numeric value indicating the number of bootstrap permutations to use for caluclating a p value. } \item{nsims}{ a (non-empty) numeric value indicating the number of simulations to run per parameter combination. } \item{nbins}{ a (non-empty) numeric value indicating the number of bins, data are pooled before analysis. } \item{alpha}{ a (non-empty) numeric value indicating the cutoff for significant p values. } \item{doPlot}{ a (non-empty) logical value indicating whether to plot the results of the power analysis. } } \value{ Returns a list and an optional set of .pdfs (if doPlot==TRUE). The list contains: \item{roc}{ a data.frame with the summarized results of the power analysis. } \item{params}{ a numeric matrix with the paramater values. } \item{results}{ a numeric matrix with the full results of the analysis. } } \examples{ ndads <- c(9,18) mm <- 4.629634 vv <- 6.31339 tau2 <- c(0,0.5) nperms <- 2 nsims <- 2 nbins <- 3 doPlot <- TRUE binomPower(ndads,mm,vv,tau2,nperms,nsims,nbins,doPlot) } multiDimBio/man/ztrans.rd0000644000176200001440000000146511654312233015121 0ustar liggesusers\name{ZTrans} \alias{ZTrans} \title{ A function to convert data into a z-score } \description{ This function converts the columns in a data matrix into z-scores. The score is computed by subracting each observation in a column from the column mean and divding by the column standard deviation. Each column is converted independently of the others missing values are ignored in the calculation. } \usage{ ZTrans(DATA) } \arguments{ \item{DATA}{ A (non-empty) matrix with data values. Columns should be different traits and rows unique observations of those traits } } \value{ Returns a matrix with the same dimensions as DATA. } \seealso{ \code{\link{PercentMax}}, \code{\link{MeanCent}} } \examples{ data(Nuclei) colMeans(Nuclei, na.rm=TRUE) Nuclei.ZT<-ZTrans(Nuclei) colMeans(Nuclei.ZT, na.rm=TRUE) } multiDimBio/man/completeData.Rd0000644000176200001440000000415512716117211016140 0ustar liggesusers\name{completeData} \alias{completeData} \title{ Function to impute missing data. } \description{ This function imputes missing data using a probabilistic principle component analysis framework and is a wrapper around functions implemented in the pcaMethods package (Stacklies et al. 2007), was proposed by Troyanskaya et al 2001 and is based on methods developed in Roweis 1997. } \usage{ completeData(data, n_pcs, cut.trait = 0.5, cut.ind = 0.5, show.test = TRUE) } \arguments{ \item{data}{ a (non-empty) numeric matrix of data values. } \item{n_pcs}{ a (non-empty) numeric value indicating the desired number of principle component axes. } \item{cut.trait}{ a number indicating the maximum proportion of missing traits before an individual is removed from data. A value of 1 will not remove any individuals and 0 will remove them all. } \item{cut.ind}{ a number indicating the maximum proportion of individuals missing a trait score before that trait is removed from data. A value of 1 will not remove any traits and 0 will remove them all. } \item{show.test}{ a logical statement indicating whether a diagnostic plot of the data imputation should be returned. } } \value{ Returns a list with two entries. \item{complete_dat}{ an object of class matrix with missing values imputed using a probabilistic principle component framework. } \item{plots}{ a list of plots stored as grid plots. } } \references{ Roweis S (1997). EM algorithms for PCA and sensible PCA. Neural Inf. Proc. Syst., 10, 626 - 632. Stacklies W, Redestig H, Scholz M, Walther D, Selbig J (2007). pcaMethods - a Bioconductor package providing PCA methods for incomplete data. Bioinformatics, 23, 1164 - 1167. Troyanskaya O, Cantor M, Sherlock G, Brown P, Hastie T, Tibshirani R, Botstein D, Altman R (2001). Missing value estimation methods for DNA microarrays. Bioinformatics, 17(6), 520 - 5252. } \seealso{ \code{\link[pcaMethods:pcaMethods]{pcaMethods}}, \code{\link{pca}} } \examples{ data(Nuclei) npcs<-floor(ncol(Nuclei)/5) length(which(is.na(Nuclei))==TRUE) dat.comp<-completeData(data = Nuclei, n_pcs = npcs) length(which(is.na(dat.comp))==TRUE) } multiDimBio/man/plotBinomPower.Rd0000644000176200001440000000100012725253650016510 0ustar liggesusers\name{plotBinomPower} \alias{plotBinomPower} \title{A function to plot the results of a binomPower run } \description{ A function to plot the results of a binomPower run. } \usage{ plotBinomPower(datPlotBig,params) } \arguments{ \item{datPlotBig}{ a (non-empty) matrix of data values, with columns trueTau, ndads, trueTau2 } \item{params}{ a (non-empty) matrix of parameter values, with columns mm and vv. } } \value{ Returns a list of two plots of the binomPower analysis results. } \examples{ #not run } multiDimBio/man/FSelect.Rd0000644000176200001440000000414512725250266015072 0ustar liggesusers\name{FSelect} \alias{FSelect} \title{A Function to perform step-wise discriminant analysis using F statistics} \description{ Select data using a F tests } \usage{ FSelect(Data, Group, target, p.adj.method = "holm", Missing.Data = "Complete") } \arguments{ \item{Data}{ A (non-empty), numeric matrix of data values } \item{Group}{ A (non-empty), vector indicating group membership. Length(unique(Group))==2 } \item{target}{ The number of desired traits. Target cannot be greater than the number of columns in Data } \item{p.adj.method}{ The method used to control for false discovery. The default setting is 'holm' } \item{Missing.Data}{ The method used to handle missing data. The default, 'Complete' will use completeData to impute missing data, setting Missing.Data='Remove' will remove all individuals with missing data. FSelect cannot handle missing data. } } \value{ FSelect returns list containing at least the following components: \item{Selected}{ An ordered list indicating which columns were selected. } \item{F.Selected}{ An ordered list containing the F statistics for each column indicated in Selected. } \item{PrF}{ An ordered list containing the p values for each column indicated in Selected. } \item{PrNotes}{ A string indicating which method was used to control for multiple comparisons } \item{model}{ An lm object with the final model results. } } \references{ Costanza M, Afifi A (1979). Comparison of Stopping Rules in Forward Stepwise Discriminant Analysis. Journal of the American Statistical Association, pp. 777 - 78 Habbema J, Hermans J (1977). Selection of Variables in Discriminant Analysis by F - Statistics and Error Rate. Technometrics, 19(4), 487 - 493. Jennrich R (1977). Stepwise discriminant analysis, volume 3. New York Wiley Sons. } \seealso{ \code{\link{completeData}} } \examples{ data(Nuclei) data(Groups) npcs<-floor(ncol(Nuclei)/5) dat.comp <- completeData(data = Nuclei, n_pcs = npcs) groups.use <- c(1,2) use.dat <- which(Groups==groups.use[1]|Groups==groups.use[2]) dat.use <- Nuclei[use.dat,] GR.use <- Groups[use.dat] #not run #FSelect(DAT.use,GR.use,3) } multiDimBio/DESCRIPTION0000644000176200001440000000205413532240373014201 0ustar liggesusersPackage: multiDimBio Type: Package Title: Multivariate Analysis and Visualization for Biological Data Version: 1.2.1 Date: 2019-08-30 Author: Samuel V. Scarpino Maintainer: Samuel V. Scarpino Description: Code to support a systems biology research program from inception through publication. The methods focus on dimension reduction approaches to detect patterns in complex, multivariate experimental data and places an emphasis on informative visualizations. The goal for this project is to create a package that will evolve over time, thereby remaining relevant and reflective of current methods and techniques. As a result, we encourage suggested additions to the package, both methodological and graphical. License: GPL (>= 3.0) LazyLoad: yes Imports: ggplot2 (>= 3.2.0), lme4 (>= 1.1-21), pcaMethods (>= 1.76.0), misc3d (>= 0.8-4), MASS (>= 7.3-29), RColorBrewer(>= 1.1-2), gridGraphics (>= 0.1-5) NeedsCompilation: no Packaged: 2019-08-30 14:21:26 UTC; scarpino Repository: CRAN Date/Publication: 2019-08-30 15:30:03 UTC multiDimBio/R/0000755000176200001440000000000013532230346012672 5ustar liggesusersmultiDimBio/R/ldaPlot.R0000644000176200001440000001164712726020637014431 0ustar liggesusers#' A function to visualize the results of a discriminant analysis #' #' The function takes as input the traits and group IDs and will perform a #' discriminate function analysis and visualize the results. For the pair-wise #' comparison of groups we use density histograms with points along the x-axis #' denoting the actual data, Figure 3 For multi-group comparisons we plot a #' bivariate scatter for all pairwise combinations of discriminate axes. The #' color of plotting symbols can be altered using the palette argument and the #' axes comparisons (with max n = number of groups - 1). #' #' #' @param Data A (non-empty), numeric matrix of data values #' @param Groups A (non-empty), vector indicating group membership. #' Length(unique(Group))==2 #' @param palette A color palette for plotting. The default is 'Paired.' See #' colorbrewer2.org for alternatives. #' @param axes A numeric vector describing which axes to compare. For example, #' axes=c(1,2) will on produce a single plot comparing the first and second #' axis. #' @return Returns a list of ggplot2 plots. #' @seealso \code{\link{lda}} #' @examples #' #' data(Nuclei) #' data(Groups) #' ldaPlot(Nuclei, Groups, palette='BrBG', axes=c(1,2,2,3,1,3)) #' #' @export ldaPlot ldaPlot <- function(Data, Groups, palette = "BrBG", axes = c(1, 2, 2, 3, 1, 3)) { LD1 <- NULL LD2 <- NULL if (min(Data, na.rm = TRUE) < 1e-04) { TOL <- min(Data, na.rm = TRUE) } else { TOL <- 1e-04 } plots_ret <- list() Groups <- as.factor(Groups) Groups <- as.numeric(Groups) if (length(unique(Groups)) > 2) { if (length(unique(Groups)) > 6) { stop("Will not work with more than 6 groups", "\n", "\n") } #end if >6 ld1 <- lda(Groups ~ Data, tol = TOL) pm1 <- predict(ld1) GC <- as.factor(pm1$class) pm1df <- data.frame(GC, pm1$x) AX <- matrix(axes, ncol = 2, byrow = TRUE) for (i in 1:nrow(AX)) { AX1 <- AX[i, 1] + 1 AX2 <- AX[i, 2] + 1 pm1df.i <- pm1df[, c(1, AX1, AX2)] colnames(pm1df.i) <- c("GC", "LD1", "LD2") # pi<-ggplot(pm1df,aes(pm1df[,AX1],pm1df[,AX2])) pi <- ggplot(pm1df.i, aes(LD1, LD2)) pi.save <- pi + geom_point(aes(colour = GC, shape = GC), size = 3.5) + scale_colour_brewer(palette = palette) + scale_x_continuous(name = colnames(pm1df)[AX1]) + scale_y_continuous(name = colnames(pm1df)[AX2]) + theme(legend.position = "right", legend.background = element_rect(fill = "#ffffffaa", colour = "black"), panel.background = element_rect(fill = "white", colour = "black"), axis.text = element_text(colour = "black", size = 15), axis.title = element_text(colour = "black", size = 15), legend.key = element_rect(fill = "white "), panel.grid.minor = element_blank(), panel.grid.major = element_blank()) plots_ret[["ldaPlot-AllGroups"]] <- pi.save } #end for i } #end if unique(groups)>2 # 1 comparison matrix ngroups <- length(unique(Groups)) comp <- makeCompMat(ngroups) for (j in 1:nrow(comp)) { use.j <- which(Groups == comp[j, 1] | Groups == comp[j, 2]) ld.j <- lda(Groups[use.j] ~ Data[use.j, ], tol = TOL) pm.j <- predict(ld.j) GC.j <- as.factor(pm.j$class) pmdf.j <- data.frame(GC.j, pm.j$x) colnames(pmdf.j) <- c("GC", "LD1") p.j <- ggplot(pmdf.j, aes(x = LD1)) psave.j <- p.j + geom_density(aes(fill = GC)) + scale_fill_manual(values = c("#8C510A66", "#01665E66")) + geom_point(aes(y = 0, x = LD1, shape = GC), size = 3.5) + geom_point(aes(y = 0, x = LD1, colour = GC, shape = GC)) + scale_colour_manual(values = c("#8C510A", "#01665E")) + labs(x = "Linear Discriminant 1 Score", y = "Density") + theme(legend.position = "right", legend.background = element_rect(fill = "#ffffffaa", colour = "black"), panel.background = element_rect(fill = "white", colour = "black"), axis.text = element_text(colour = "black", size = 15), axis.title = element_text(colour = "black", size = 15), legend.key = element_rect(fill = "white "), panel.grid.minor = element_blank(), panel.grid.major = element_blank()) timestamp <- as.character(as.integer(Sys.time())) plots_ret[[paste(comp[j, 1], comp[j, 2], "Group LDA Plot", sep = " ")]] <- psave.j } #end for j return(plots_ret) } #end FUNCTION multiDimBio/R/IntPlot.R0000644000176200001440000000644012726020637014416 0ustar liggesusers#' A function to visualize the results of a MANOVA #' #' The function produces an interaction plot to demonstrate the results of a #' MANOVA using the function interaction.plot. #' #' #' @param Scores A (non-empty) numeric matrix of principle component scores or #' raw data. #' @param Cov.A A (non-empty) bivariate factor vector indicating the factor for #' each row in Scores #' @param Cov.B A (non-empty) bivariate factor vector indicating the factor for #' each row in Scores #' @param pvalues An optional vector of p values for each covariate across #' Scores. The length of pvalues must equal the number of columns in Scores #' times 2. #' @param int.pvalues An optional vector of p values for each interaction. The #' length of int.pvalues must equal the number of columns in Scores. #' @return a list of plots stored as grid plots. #' @seealso \code{\link{interaction.plot}} #' @examples #' #' data(Scores) #' data(CondA) #' data(CondB) #' #' pvals<-c(0.03,0.6,0.05,0.07,0.9,0.2,0.5,0.3) #' int.pvals<-c(0.3,0.45,0.5,0.12) #' #' IntPlot(Scores,CondA,CondB,pvalues=pvals, int.pvalues=int.pvals) #' #' @export IntPlot IntPlot <- function(Scores, Cov.A, Cov.B, pvalues = rep(1, 8), int.pvalues = rep(1, 4)) { if (ncol(Scores) > 4) { warning("Only the first 4 Score axes are plotted", "\n", "\n") } # line colors pvalues[pvalues > 0.05] <- 1 pvalues[pvalues <= 0.05 & pvalues > 0.01] <- 2 pvalues[pvalues <= 0.01 & pvalues > 0.005] <- 3 pvalues[pvalues <= 0.005] <- 4 colors <- c("gray", "#FDBB84", "#EF6548", "#990000") COLS <- colors[pvalues] COL <- matrix(COLS, ncol = 2, nrow = 4, byrow = TRUE) # line types for interaction int.pvalues[int.pvalues > 0.05] <- 5 int.pvalues[int.pvalues <= 0.05] <- 1 LTY <- int.pvalues plots_ret <- list() #interaction plot plot(rep(0, 4), 1:4, pch = 15, col = colors, cex = 5, xaxt = "n", yaxt = "n", xlab = "", ylab = "", xlim = c(0, 0.5), ylim = c(0, 4), lwd = 2) segments(0, 0, 0.1, 0, lty = 3, lwd = 2) segments(0, 0.5, 0.1, 0.5, lty = 1, lwd = 2) text(0.1, 4, labels = "p<0.005", cex = 2) text(0.15, 3, labels = "0.01< p >0.005", cex = 2) text(0.15, 2, labels = "0.05< p >0.01", cex = 2) text(0.1, 1, labels = "p >0.05", cex = 2) text(0.27, 0.5, labels = "Interaction Significant", cex = 2) text(0.3, 0, labels = "Interaction Not Significant", cex = 2) plots_ret[["Pvalue-Colors-InteractionPlots"]] <- recordPlot() #interaction plot legend layout(matrix(c(1, 2, 3, 4), ncol = 2, byrow = TRUE)) for (i in 1:4) { interaction.plot(Cov.A, Cov.B, Scores[, i], legend = TRUE, ylab = paste("Axis", i, "Score", sep = " "), xlab = "", col = c(COL[i, 1], COL[i, 2]), lty = LTY[i], lwd = 2) } plots_ret[["Interaction Plots-LEGEND"]] <- recordPlot() #interaction plots no colors layout(matrix(c(1, 2, 3, 4), ncol = 2, byrow = TRUE)) for (i in 1:4) { interaction.plot(Cov.A, Cov.B, Scores[, i], legend = FALSE, ylab = paste("Axis", i, "Score", sep = " "), xlab = "", col = c(COL[i, 1], COL[i, 2]), lty = LTY[i], lwd = 2) } plots_ret[["Interaction Plots"]] <- recordPlot() return(plots_ret) } #end function multiDimBio/R/ZTrans.R0000644000176200001440000000225112726020637014242 0ustar liggesusers#' A function to convert data into a z-score #' #' This function converts the columns in a data matrix into z-scores. The #' score is computed by subracting each observation in a column from the column #' mean and divding by the column standard deviation. Each column is converted #' independently of the others missing values are ignored in the calculation. #' #' #' @param DATA A (non-empty) matrix with data values. Columns should be #' different traits and rows unique observations of those traits #' @return Returns a matrix with the same dimensions as DATA. #' @seealso \code{\link{PercentMax}}, \code{\link{MeanCent}} #' @examples #' #' data(Nuclei) #' #' colMeans(Nuclei, na.rm=TRUE) #' #' Nuclei.ZT<-ZTrans(Nuclei) #' #' colMeans(Nuclei.ZT, na.rm=TRUE) #' #' @export ZTrans ZTrans <- function(DATA) { cmean <- apply(DATA, 2, mean, na.rm = TRUE) cmat <- matrix(cmean, ncol = ncol(DATA), nrow = nrow(DATA), byrow = TRUE) muDAT <- DATA - cmat csd <- apply(DATA, 2, sd, na.rm = TRUE) cdmat <- matrix(csd, ncol = ncol(DATA), nrow = nrow(DATA), byrow = TRUE) Z.SCORES <- muDAT/cdmat return(Z.SCORES) } #end FUNCTION multiDimBio/R/h2Estimate.R0000644000176200001440000000753213532223415015030 0ustar liggesusers#' Estimates the heritability of a binomial trait #' #' Estimates the narrow-sense heritability of a binomial trait and calculates a #' p value by randomization. #' #' Estimates the narrow-sense heritability of a binomial trait. This function #' works by fitting two models, one with and one without a random-effect of #' sire. These models are compared by randomizing the sire ids nreps times and #' re-fitting the model. For each of the nreps model pairs, a deviance is #' calculated and a "p value" estimated by comparing that distribution of #' deviance to the observed. The heritability is approximatly #' tau2/(tau2+(pi/sqrt(3))^2), where tau2 is the random-effect variance due to #' sire. #' #' @param data a (non-empty) numeric matrix with three columns. The first two #' should contain the trait data (number of occurances of each outcome type) #' and the third should contain the group ids. #' @param nreps a (non-empty) numeric value indicating the number of resamples #' to perform when calculating the emperical p value. #' @return Returns a list. The list contains: \item{h2}{ The estimated #' narrow-sense heritability. The narrow-sense heritability is approximatly #' tau2/(tau2+(pi/sqrt(3))^2), where tau2 is the random-effect variance due to #' sire. } #' #' \item{pval}{ The probability that the best-fit model includes an extra #' variance term for sire (random effect of dad). The value is calculated by #' comparing the deviances from nreps number of randomized model comparisions. #' } #' #' \item{deviance}{ The deviance between a null model without a random effect #' of dad and a model with. } #' #' \item{sim}{ The simulated deviances used in calculating the p value in pval. #' } #' #' \item{obsMod}{ The glmer model object resulting from the observed data. } #' @examples #' #' ndads <- 18 #' mm <- 4 #' vv <- 6 #' tau2 <- 1.5 #' nbins <- 3 #' #' mylogit <- function(x) log(x/{1-x}) #' ilogit <- function(x) 1/{1+exp(-x)} #' #' swimprob <- ilogit(rnorm(ndads, 0, sqrt(tau2))) #' mytable <- NULL #' for(i in 1:ndads) { #' bincounts <- pmax(1,rnbinom(nbins, mu = mm, size = mm^2/{vv-mm})) #' swim <- rbinom(3, bincounts,swimprob[i]) #' set <- bincounts - swim #' theserows <- data.frame(set=set,swim=swim, Dad = i, Bin = 1:nbins) #' mytable <- rbind(mytable, theserows) #' } #' #' est <- h2Estimate(mytable,nreps=10) #' #' print(est$h2) #' #' @export h2Estimate h2Estimate <- function(data, nreps = 1000) { colnames(data) <- c("trait1", "trait2", "dad") hm0.real = glm(cbind(trait1, trait2) ~ 1, data = data, family = binomial) # hm1.real = glmer(cbind(trait1, trait2) ~ (1 | dad), # data=data, family=binomial, REML=FALSE) hm1.real = glmer(cbind(trait1, trait2) ~ (1 | dad), data = data, family = binomial) Dobs = as.numeric(deviance(hm0.real) - deviance(hm1.real)) if(Dobs < 1e-5){ pval = 1 Dobs = 0 sim = NA trueTau2 <- 0 h2 <- 0 perm1 = NA }else{ ncases <- nrow(data) perm1 = replicate(nreps, { neworder = sample(1:ncases, ncases) ptable = data.frame(dad = data$dad, t1 = data$trait1[neworder], t2 = data$trait2[neworder]) hm0 = glm(cbind(t1, t2) ~ 1, data = ptable, family = binomial) # hm1 = glmer(cbind(t1,t2) ~ (1 | dad), data=ptable, # family=binomial, REML=FALSE) hm1 = glmer(cbind(t1, t2) ~ (1 | dad), data = ptable, family = binomial) D = as.numeric(deviance(hm0) - deviance(hm1)) D }) pval <- length(which(perm1 > Dobs))/length(perm1) trueTau2 <- (VarCorr(hm1.real)$dad[1])^2 h2 <- 4 * (trueTau2/(trueTau2 + (pi/sqrt(3))^2)) } out <- list(h2 = h2, pval = pval, deviance = Dobs, sim = perm1, trueTau2 = trueTau2, obsMod_glmer = hm1.real, obsMod_glm = hm0.real) return(out) } multiDimBio/R/multiDimBio-package.R0000644000176200001440000002011113024771432016621 0ustar liggesusers #' Treatment condition for animals contained in the data set Nuclei #' #' Animals measured in the Nuclei data set were either from linneages exposed #' to the fungicide Vinclozolin (Vinclozolin) or not (Control). #' #' #' @name CondA #' @docType data #' @format A factor vector indicating which treatment group the individuals in #' Nuclei belong to. #' @references Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK #' Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response #' in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). #' 9143 - 9148. #' @source The data are provided courtesy of David Crews at the University of #' Texas at Austin. #' @examples #' #' data(CondA) #' NULL #' Stress condition for animals contained in the data set Nuclei #' #' Animals measured in the Nuclei data set were either subjected to chronic #' restraint stress (stress) or not (control). #' #' #' @name CondB #' @docType data #' @format A factor vector indicating which stress group the individuals in #' Nuclei belong to. #' @references Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK #' Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response #' in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). #' 9143 - 9148. #' @source The data are provided courtesy of David Crews at the University of #' Texas at Austin. #' @examples #' #' data(CondB) #' NULL #' Housing dyad for animals contained in the data set Nuclei #' #' Animals measured in the Nuclei data set were housed in dyads with one #' individual from the Vinclozolin line and one from the control line housed #' together. Each dyad was either stressed or not stressed. #' #' #' @name Dyad #' @docType data #' @format A factor vector indicating which housing dyad the individuals in #' Nuclei are in. #' @references Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK #' Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response #' in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). #' 9143 - 9148. #' @source The data are provided courtesy of David Crews at the University of #' Texas at Austin. #' @examples #' #' data(Dyad) #' NULL #' The group ID for animals contained in the data set Nuclei #' #' Animals measured in the Nuclei data set belong to one of four groups #' determined by their linneage (Vinclozolin or Control) and their stress #' treatment (Stressed or Non-Stressed). #' #' #' @name Groups #' @docType data #' @format A factor vector indicating which group the individuals in Nuclei are #' in. #' @references Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK #' Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response #' in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). #' 9143 - 9148. #' @source The data are provided courtesy of David Crews at the University of #' Texas at Austin. #' @examples #' #' data(Groups) #' NULL #' A Package for the Design, Analysis, and Visualization of Systems Biology #' Experiments #' #' multiDimBio is a package designed to support a systems biology research #' program from inception through publication. It focuses on dimension #' reduction approaches to detect patterns in complex, multivariate #' experimental data and places an emphasis on informative visualizations. The #' goal for this project is to create a package that will evolve over time, #' thereby remaining relevant and reflective of current methods and techniques. #' As a result, we encourage suggested additions to the package, both #' methodological and graphical. #' #' \tabular{ll}{ Package: \tab multiDimBio\cr Type: \tab Package\cr Version: #' \tab 1.1.1\cr Date: \tab 2016-12-16\cr License: \tab GPL 3.0\cr LazyLoad: #' \tab yes\cr } The datasets are: Nuclei, Groups, CondA, CondB, Scores, and #' Dyad #' #' The main functions are: boxWhisker, completeData, F_select, intPlot, #' ldaPlot, loadings, meanCent, percentMax, permuteLDA, power, ppca_mdb, #' zTrans, binomPower, h2Estimate, and plotBinomPower. #' #' Type ? to learn more about these objects, e.g. ?Nuclei #' #' Type ? to see examples of the function's use, e.g. ?FSelect #' #' @name multiDimBio-package #' @aliases multiDimBio-package multiDimBio #' @docType package #' @author Samuel V Scarpino Maintainer: Samuel V Scarpino #' #' @seealso #' #' \code{\link[pcaMethods:pcaMethods]{pcaMethods}} #' @references Collyer M, Adams D. (2007) Analysis of Two - State Multivariate #' Phenotypic Change in Ecological Studies. Ecology: 88(3) 683 - 692. #' #' Costanza M, Afifi A. (1979) Comparison of Stopping Rules in Forward Stepwise #' Discriminant Analysis. Journal of the American Statistical Association: pp. #' 777 - 78 #' #' Crews D, Gillette R, Scarpino SV, Manikkam M, Savenkova MI, Skinner MK. #' (2012) Epigenetic Transgenerational Alterations to Stress Response in Brain #' Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA: 109(23) 9143 - 9148. #' #' Davies SW, Scarpino SV, Pongwarin T, Scott J, Matz MV. (2015) Estimating #' Trait Heritability in Highly Fecund Species. G3: Genes| Genomes| Genetics: #' 5(12) 2639 - 45. #' #' Habbema J, Hermans J. (1977) Selection of Variables in Discriminant Analysis #' by F-Statistics and Error Rate. Technometrics: 19(4) 487 - 493. #' #' Jennrich R. (1977) Stepwise discriminant analysis, volume 3. New York Wiley #' Sons. #' #' Roweis S. (1997) EM algorithms for PCA and sensible PCA. Neural Inf. Proc. #' Syst.: 10 626 - 632. #' #' Stacklies W, Redestig H, Scholz M, Walther D, Selbig J. (2007) pcaMethods - #' a Bioconductor package providing PCA methods for incomplete data. #' Bioinformatics: 23 1164 - 1167. #' #' Troyanskaya O, Cantor M, Sherlock G, Brown P, Hastie T, Tibshirani R, #' Botstein D, Altman R. (2001) Missing value estimation methods for DNA #' microarrays. Bioinformatics: 17(6) 520 - 5252. NULL #' Brain activity in 14 brain regions for 71 individuals #' #' The activity in 14 brain nuclei were measured in rats that were in one of #' four groups: 1) Non-stressed, Control 2) Stressed, Control 3) Non-stressed, #' Vinclozolin 4) Stressed, Vinclozolin #' #' Two different cohorts of male rats of the F3 generation of Vinclozolin #' (Vinclozolin-Lineage) and Vehicle Control (Control-Lineage) Lineages #' produced at Washington State University are shipped to the University of #' Texas on the day after weaning. Rats are randomly pair-housed (one #' Control-Lineage and one Vinclozolin-Lineage animal) and remain in these #' dyads throughout the duration of the study. Half of the dyads are randomly #' chosen to receive chronic restraint stress (CRS) treatment for 6 hours daily #' for 21 consecutive days commencing 1 hr after lights off. Activity in 14 #' brain nuclei were measured at the end of the study. #' #' @name Nuclei #' @docType data #' @format A numeric matrix with 71 individuals as rows and the activity of 14 #' brain nuclei as columns. NAs indicate missing data. #' @references Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK #' Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response #' in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). #' 9143 - 9148. #' @source The data are provided courtesy of David Crews at the University of #' Texas at Austin. #' @examples #' #' data(Nuclei) #' NULL #' Principle component scores based on the data in Nuclei #' #' Principle component scores were computed using PPCA for the data set Nuclei. #' #' #' @name Scores #' @docType data #' @format A numeric matrix with 4 columns and the same number of rows as #' Nuclei. There are no missing values. #' @references Crews, D, R Gillette, SV Scarpino, M Manikkam, MI Savenkova, MK #' Skinner. 2012. Epigenetic Transgenerational Alterations to Stress Response #' in Brain Gene Networks and Behavior. Proc. Natl. Acad. Sci. USA. 109 (23). #' 9143 - 9148. #' @source The data are provided courtesy of David Crews at the University of #' Texas at Austin. #' @examples #' #' data(Scores) #' #' data(Nuclei) #' #' SCORES<-PPCA(Nuclei)@scores #' NULL multiDimBio/R/MeanCent.R0000644000176200001440000000153312726020637014515 0ustar liggesusers#' A function to scale data to mean 0 #' #' This function rescales the columns in a data matrix to have mean 0. The #' variance is not scaled and missing values are ignored in the calculation. #' #' #' @param DATA A (non-empty) matrix with data values. Columns should be #' different traits and rows unique observations of those traits #' @return Returns a matrix with the same dimensions as DATA. #' @seealso \code{\link{ZTrans}}, \code{\link{PercentMax}} #' @examples #' #' data(Nuclei) #' #' colMeans(Nuclei, na.rm=TRUE) #' #' Nuclei.MC<-MeanCent(Nuclei) #' #' colMeans(Nuclei.MC, na.rm=TRUE) #' #' @export MeanCent MeanCent <- function(DATA) { cmean <- apply(DATA, 2, mean, na.rm = TRUE) cmat <- matrix(cmean, ncol = ncol(DATA), nrow = nrow(DATA), byrow = TRUE) muDAT <- DATA - cmat return(muDAT) } #end FUNCTION multiDimBio/R/Power.R0000644000176200001440000001624712726020637014127 0ustar liggesusers#' A function to estimate the error rate for FSelect and PermuteLDA. #' #' Methods are implemented to compute the statistical power, in terms of the #' type II error rate, based on anticipated sample and effect sizes for #' FSelect() and PermuteLDA(). By default the power of both tests are #' determined by iterating over a range of effect and sample sizes. The default #' settings were selected to be representative of many behavioral genetic #' studies; however, users can input alternative sample and effect sizes. For #' high values of trials this function can be very slow. #' #' The algorithm for the power analysis proceeds as follows: 1. Input sample #' and effect sizes 2. Set the number of significant effects, e to 0. Note - #' Total number of traits is fixed at 6 3. Draw random deviates for the given #' sample size for 6 traits. Note - All traits not significant under this #' iteration are drawn from a N(0,1) distribution. 4. Perform either FSelect() #' or PermuteLDA() and record the results. 5. Return to step 3 N times, #' recording the results each time. Note - N is set using the trials input 6. #' If e<5 return to step 2 and set the number of significant effects to e+1 7. #' Proceed to the next combination of sample and effect size. 8. Output the #' results for each combination of sample and effect size as a function of the #' number of significant traits. #' #' @param func A character string indicating which function to compute the #' power for, can be either 'PermuteLDA' or 'FSelect' #' @param N A (non-empty) vector of group sizes. The lenght of N must be #' greater than 1 and tha minimum group size for 'FSelect' can not be less than #' 6. The size of each group is N/2. #' @param effect.size A (non-empty) vector or single value of effect sizes. #' @param trials A number indicating the number of trials for each combination #' of N and effect.size to calculate the power. #' @return Outputs a list with plots and results for each effect size. #' @seealso \code{\link{PermuteLDA}},\code{\link{FSelect}} #' @examples #' #' #not run #' #Power(func = 'FSelect', N=c(6,8), effect.size=0.5, trials = 2) #' #' @export Power Power <- function(func = "PermuteLDA", N = "DEFAULT.N", effect.size = "DEFAULT.e", trials = 100) { cat("Power Analysis -", func, "\n", "\n") if (length(N) < 2) { stop("must have more than two group sizes") } if (func == "FSelect" & min(N < 6)) { stop("must have more than 5 indiv with FSelect") } if (is.character(effect.size) == TRUE) { ES <- c(0.1, 0.4, 0.8, 1.6) } else { ES <- effect.size } if (is.character(N) == TRUE) { N <- c(6, 12, 24, 48, 96) } counter <- length(ES) res <- list() plots_rest <- list() for (e in ES) { start <- Sys.time() effect <- e RESULTS <- c() for (n in N) { RESULTS.P <- c() for (k in 0:4) { r.p <- c() for (i in 1:trials) { if (k == 0) { mu <- c(0, 0, 0, 0) } else { if (k == 1) { mu <- c(effect, 0, 0, 0) } else { if (k == 2) { mu <- c(effect, effect, 0, 0) } else { if (k == 3) { mu <- c(effect, effect, effect, 0) } else { mu <- c(effect, effect, effect, effect) } #end if/else (k=3) } #end if/else (k=2) } #end if/else (k=1) } #end if/else (k=0) t1 <- rnorm(n) t2 <- c(rnorm(n/2, mu[4]), rnorm(n/2, 0)) t3 <- c(rnorm(n/2, mu[1]), rnorm(n/2, 0)) t4 <- c(rnorm(n/2, mu[2]), rnorm(n/2, 0)) t5 <- c(rnorm(n/2, mu[3]), rnorm(n/2, 0)) t6 <- runif(n) # data T <- cbind(t1, t2, t3, t4, t5, t6) # groups GR <- rep(1:2, each = n/2) # results if (func == "PermuteLDA") { func.i <- PermuteLDA(T, GR, 100) if (func.i[3] <= 0.05) { p.i <- 1 } else { p.i <- 0 } #end if/else(func.i[3]) } else { func.i <- FSelect(T, GR, 4) ks <- 0:k if (length(k) == 0) { is.sig <- sum(func.i$PrF <= 0.05) } else { ks <- ks[-1] is.sig <- sum(func.i$PrF[ks] <= 0.05) } #end if/else length(k) if (is.sig >= 1) { p.i <- 1 } else { p.i <- 0 } #end if/else is.sig>=1 } #end if/else func== r.p <- c(r.p, p.i) } #end for i RESULTS.P <- c(RESULTS.P, sum(r.p)/i) } #end for k RESULTS <- c(RESULTS, RESULTS.P) } #end for n RP <- matrix(RESULTS, ncol = 5, byrow = TRUE) X <- N X <- X/2 Size <- rep(X, times = 5) SigTraits <- c("0", "1", "2", "3", "4") SigTraits <- rep(SigTraits, each = length(X)) RES.P <- matrix(RP, ncol = 1) DAT <- data.frame(Size, RES.P, SigTraits) pal <- palette(brewer.pal(5, "YlGnBu")) pal <- palette(brewer.pal(5, "YlGnBu")) pal[1] <- "black" p <- ggplot(DAT, aes(Size, RES.P)) p.save <- p + geom_line(aes(group = SigTraits, colour = SigTraits), size = 2) + scale_x_continuous(expand = c(0, 0), name = "Individuals Per Group") + scale_y_continuous(expand = c(0, 0), limits = c(0, 1), name = "Proportion Significant") + geom_hline(aes(yintercept = 0.8), linetype = 3) + scale_colour_manual(values = pal) + theme(legend.position = "right", legend.background = element_rect(fill = "#ffffffaa", colour = "black"), panel.background = element_rect(fill = "white", colour = "black"), axis.text = element_text(colour = "black", size = 15), axis.title = element_text(colour = "black", size = 15), legend.key = element_rect(fill = "white "), panel.grid.minor = element_blank(), panel.grid.major = element_blank()) plots_ret[[paste(as.character(effect), func, sep = ".")]] <- p.save res[[paste(as.character(effect), func, sep = ".")]] <- DAT counter <- counter - 1 end <- Sys.time() total <- end - start remaining <- total * counter print() cat(paste0(remaining," estimated time remaining"), "\n", "\n") } #end for e return(list("results" = res, "plots" = plots_ret)) } #end FUNCTION multiDimBio/R/boxWhisker.R0000644000176200001440000000334412726020637015152 0ustar liggesusers#' A function to create a box and whisker plot by group ID #' #' A function to create a box and whisker plot by group ID. #' #' #' @param data a (non-empty) matrix of data values #' @param groups a (non-empty) vector of group IDs with length equal to the #' number of rows in data #' @param palette A color palette for plotting. The default is 'Paired.' See #' colorbrewer2.org for alternatives. #' @return Returns a box-whisker plot of the data by group ID. #' @examples #' #' data(Nuclei) #' data(Groups) #' boxWhisker(Nuclei, Groups) #' #' #changing the color palette #' #' boxWhisker(data = Nuclei, groups = Groups, palette = 'Set1') #' #' @export boxWhisker boxWhisker <- function(data, groups, palette = "Paired") { dat <- as.matrix(data) score <- matrix(dat, ncol = 1) trait <- colnames(data) trait <- rep(trait, each = nrow(dat)) group <- rep(groups, ncol(dat)) dat.df <- data.frame(score, trait, group) dat.df$group <- factor(dat.df$group) dat.trait <- factor(dat.df$trait) p <- ggplot(dat.df, aes(trait, score)) p.save <- p + geom_boxplot(aes(fill = group), outlier.shape = NA) + scale_fill_brewer(palette = palette) + theme(legend.position = "right", legend.background = element_rect(fill = "#ffffffaa", colour = "black"), panel.background = element_rect(fill = "white", colour = "black"), axis.text.y = element_text(colour = "black", size = 15), axis.text.x = element_text(colour = "black", size = 8), axis.title = element_text(colour = "black", size = 15), legend.key = element_rect(fill = "white "), panel.grid.minor = element_blank(), panel.grid.major = element_blank()) return(p.save) } #end FUNCTION multiDimBio/R/binomPower.R0000644000176200001440000000676012726020637015153 0ustar liggesusers#' Power analysis for estimating the heritability of a binomial trait #' #' Performs a power analysis for estimating the heritability of a binomial #' trait. This function can take a long time to run if either nsims or nperms #' is large. #' #' #' @param ndads a (non-empty) numeric value indicating the number of dads. #' @param mm a (non-empty) numeric value indicating the mean number of #' offspring per dad per bin (normal dist). #' @param vv a (non-empty) numeric value indicating the variance in offspring #' per dad per bin (normal dist). #' @param tau2 a (non-empty) numeric value indicating the dad effect #' (narrow-sense heritability ~ tau2/(tau2+(pi/sqrt(3))^2)). #' @param nperms a (non-empty) numeric value indicating the number of bootstrap #' permutations to use for caluclating a p value. #' @param nsims a (non-empty) numeric value indicating the number of #' simulations to run per parameter combination. #' @param nbins a (non-empty) numeric value indicating the number of bins, data #' are pooled before analysis. #' @param alpha a (non-empty) numeric value indicating the cutoff for #' significant p values. #' @param doPlot a (non-empty) logical value indicating whether to plot the #' results of the power analysis. #' @return Returns a list and an optional set of .pdfs (if doPlot==TRUE). The #' list contains: \item{roc}{ a data.frame with the summarized results of the #' power analysis. } #' #' \item{params}{ a numeric matrix with the paramater values. } #' #' \item{results}{ a numeric matrix with the full results of the analysis. } #' @examples #' #' ndads <- c(9,18) #' mm <- 4.629634 #' vv <- 6.31339 #' tau2 <- c(0,0.5) #' nperms <- 2 #' nsims <- 2 #' nbins <- 3 #' doPlot <- TRUE #' binomPower(ndads,mm,vv,tau2,nperms,nsims,nbins,doPlot) #' #' @export binomPower binomPower <- function(ndads, mm, vv, tau2, nperms, nsims, nbins, alpha = 0.05, doPlot = FALSE) { # power analysis code params <- expand.grid(tau2, ndads, mm, vv) colnames(params) <- c("tau2", "ndads", "mm", "vv") rm <- which(params$vv < params$mm) if (length(rm) > 0) { params <- params[-rm, ] } ptest <- vector("list", nrow(params)) out <- c() for (i in 1:nrow(params)) { ptest.i <- getP(ndads = params$ndads[i], mm = params$mm[i], vv = params$vv[i], tau2 = params$tau2[i], nperms = nperms, nsims = nsims, nbins = 3) ptest[[i]] <- ptest.i out <- c(out, ptest.i) } out.save <- matrix(out, ncol = nsims, byrow = TRUE) reject <- apply(out.save, 1, function(x, alpha) return(length(which(x < alpha))), alpha = alpha) reject <- reject/nsims trueM0 <- which(params$tau2[1:length(reject)] == 0) trueM1 <- (1:length(reject))[-trueM0] falseP <- rep(NA, length(reject)) falseP[trueM0] <- reject[trueM0] falseP[trueM1] <- 1 - reject[trueM1] trueP <- 1 - falseP dat.plot <- data.frame(falseP, trueP, params$ndads[1:length(reject)], params$tau2[1:length(reject)]) colnames(dat.plot) <- c("falseP", "trueP", "ndads", "trueTau2") dat.plot$group <- paste0("trueTau2", ":", dat.plot$trueTau, "-", "nDads", ":", dat.plot$ndads) dat.plot$trueTau <- as.factor(dat.plot$trueTau) dat.plot$ndads <- as.factor(dat.plot$ndads) tstamp <- as.numeric(Sys.time()) if (doPlot == TRUE) { plotBinomPower(datPlotBig = dat.plot, params = params) } out <- list(roc = dat.plot, params = params, results = out.save) return(out) } multiDimBio/R/LandscapePlot.R0000644000176200001440000002354412726020637015562 0ustar liggesusers#' A function to visualize the Functional Landscape of measured traits #' #' This function plots a three-dimensional landscape of measured traits. The #' peak heights are relative with respect to the input data. The width of each #' peak is controlled by the argument sigma and has only an aesthetic purpose. #' The 3D image is generated using the #' \code{\link[misc3d:drawScene]{drawScene}} and #' \code{\link[misc3d:surfaceTriangles]{surfaceTriangles}} . #' #' #' @param Data A (non-empty) numeric matrix with trait values #' @param Groups A (non-empty)factor vector indicating the group membership of #' each row in Data. If there is only a single group present in Data then #' Groups=NULL (default). #' @param PDF Logical controlling whether to output the results as a .pdf or a #' .jpeg. The default (PDF=FALSE) will produce a .jpeg. The file size for #' .pdf output can be large. #' @param LocPlot Logical controlling whether to output a .pdf naming the peaks #' according to the columns they represent. The defaul is FALSE. #' @param control An optional numeric vector setting the control parameters for #' persp. control[1] = theta, control[2] = r, control[3] = phi #' @return a list of plots stored as grid plots (or.pdf if PDF=TRUE) file for #' each column in data. #' @examples #' #' data(Nuclei) #' data(Groups) #' #' #plotting the first six columns #' #not run #' #LandscapePlot(Nuclei[,1:6], Groups=Groups) #' #' @export LandscapePlot LandscapePlot <- function(Data, Groups = NULL, PDF = FALSE, LocPlot = FALSE, control = c(75, 1, 30)) { if (is.matrix(Data) == FALSE) { stop("Data must be a matrix", "\n", "\n") } if (ncol(Data) > 9) { stop("Does not plot more than 9 traits", "\n", "\n") } mu1 <- 0 # setting the expected value of x1 mu2 <- 0 # setting the expected value of x2 s11 <- 10 # setting the variance of x1 s12 <- 15 # setting the covariance between x1 and x2 s22 <- 10 # setting the variance of x2 rho <- 0.5 # setting the correlation coefficient between x1 and x2 x1 <- seq(-10, 10, length = 61) # generating the vector series x1 x2 <- x1 # copying x1 to x2 f <- function(x1, x2) { term1 <- 1/(2 * pi * sqrt(s11 * s22 * (1 - rho^2))) term2 <- -1/(2 * (1 - rho^2)) term3 <- (x1 - mu1)^2/s11 term4 <- (x2 - mu2)^2/s22 term5 <- -2 * rho * ((x1 - mu1) * (x2 - mu2))/(sqrt(s11) * sqrt(s22)) term1 * exp(term2 * (term3 + term4 - term5)) } # setting up the function of the multivariate normal density M <- outer(x1, x2, f) # GRS <- unique(Groups) if (length(GRS) == 0) { GRS <- "Group" Groups <- rep(GRS, length = nrow(Data)) } for (j in 1:length(GRS)) { use.j <- which(Groups == GRS[j]) DAT.j <- Data[use.j, ] DAT.j <- as.matrix(DAT.j) if (ncol(DAT.j) > 1) { mu.j <- colMeans(DAT.j, na.rm = TRUE) } else { mu.j <- mean(DAT.j, na.rm = TRUE) } if (length(mu.j) == 1) { M.plot <- M * mu.j posx <- c(0) posy <- c(0.5) labels <- c("1") } if (length(mu.j) == 2) { MA <- cbind(M * 0, M * mu.j[2]) MB <- cbind(M * mu.j[2], M * 0) M.plot <- rbind(MA, MB) posx <- c(0.75, -0.75) posy <- c(1, 0.5) labels <- c("1", "2") } if (length(mu.j) == 3) { M.plot <- matrix(0, nrow = 61 * 2, ncol = 61 * 3) M.plot[1:61, 62:122] <- M * mu.j[1] M.plot[62:122, 1:61] <- M * mu.j[2] M.plot[62:122, 123:183] <- M * mu.j[3] M.fill <- matrix(, ncol = 183, nrow = 61) M.plot <- rbind(M.fill, M.plot) posx <- c(0.5, -0.75, 0.5) posy <- c(0.5, 0, -0.15) labels <- c("1", "2", "3") } if (length(mu.j) == 4) { MA <- cbind(M * mu.j[1], M * mu.j[2]) MB <- cbind(M * mu.j[3], M * mu.j[4]) M.plot <- rbind(MA, MB) posx <- c(-0.5, 0.75, -1, 0.2) posy <- c(1, 0.75, 0, -0.5) labels <- c("1", "2", "3", "4") } if (length(mu.j) == 5) { M.plot <- matrix(0, nrow = 61 * 3, ncol = 61 * 3) M.plot[1:61, 62:122] <- M * mu.j[1] M.plot[62:122, 1:61] <- M * mu.j[2] M.plot[62:122, 123:183] <- M * mu.j[3] M.plot[123:183, 1:61] <- M * mu.j[4] M.plot[123:183, 123:183] <- M * mu.j[5] posx <- c(0.5, -0.3, 0.3, -0.75, 0) posy <- c(1, 0.4, 0.4, 0, 0) labels <- c("1", "2", "3", "4", "5") } if (length(mu.j) == 6) { M.plot <- matrix(0, nrow = 61 * 4, ncol = 61 * 3) M.plot[1:61, 62:122] <- M * mu.j[1] M.plot[62:122, 1:61] <- M * mu.j[2] M.plot[62:122, 123:183] <- M * mu.j[3] M.plot[123:183, 1:61] <- M * mu.j[4] M.plot[123:183, 123:183] <- M * mu.j[5] M.plot[184:244, 62:122] <- M * mu.j[6] M.fill <- matrix(, ncol = 61, nrow = 244) M.plot <- cbind(M.fill, M.plot) posx <- c(0.5, -0.3, 0.3, -0.75, 0, -0.75) posy <- c(1, 0.6, 0.4, 0.25, 0.25, 0) labels <- c("1", "2", "3", "4", "5", "6") } if (length(mu.j) == 7) { M.plot <- matrix(0, nrow = 61 * 4, ncol = 61 * 5) M.plot[1:61, 123:183] <- M * mu.j[1] M.plot[62:122, 62:122] <- M * mu.j[2] M.plot[62:122, 184:244] <- M * mu.j[3] M.plot[123:183, 1:61] <- M * mu.j[4] M.plot[123:183, 245:305] <- M * mu.j[5] M.plot[184:244, 62:122] <- M * mu.j[6] M.plot[184:244, 184:244] <- M * mu.j[7] M.fill <- matrix(, ncol = 305, nrow = 61) M.plot <- rbind(M.fill, M.plot) posx <- c(0.5, -0.3, 0.5, -0.75, 0.75, -0.75, 0.2) posy <- c(0.6, 0.4, 0.25, 0.25, -0.2, -0.1, -0.65) labels <- c("1", "2", "3", "4", "5", "6", "7") } if (length(mu.j) == 8) { M.plot <- matrix(0, nrow = 61 * 4, ncol = 61 * 4) M.plot[1:61, 62:122] <- M * mu.j[1] M.plot[1:61, 123:183] <- M * mu.j[2] M.plot[62:122, 1:61] <- M * mu.j[3] M.plot[62:122, 184:244] <- M * mu.j[4] M.plot[123:183, 1:61] <- M * mu.j[5] M.plot[123:183, 184:244] <- M * mu.j[6] M.plot[184:244, 62:122] <- M * mu.j[7] M.plot[184:244, 123:183] <- M * mu.j[8] posx <- c(0.2, 0.6, -0.3, 0.7, -0.75, 0.75, -0.75, -0.1) posy <- c(0.5, 0.5, 0.4, 0, 0.25, -0.35, -0.1, -0.2) labels <- c("1", "2", "3", "4", "5", "6", "7", "8") } if (length(mu.j) == 9) { M.plot <- matrix(0, nrow = 61 * 5, ncol = 61 * 6) M.plot[1:61, 184:244] <- M * mu.j[1] M.plot[62:122, 62:122] <- M * mu.j[2] M.plot[62:122, 245:305] <- M * mu.j[3] M.plot[123:183, 1:61] <- M * mu.j[4] M.plot[123:183, 306:366] <- M * mu.j[5] M.plot[184:244, 62:122] <- M * mu.j[6] M.plot[184:244, 245:305] <- M * mu.j[7] M.plot[245:305, 123:183] <- M * mu.j[8] M.plot[245:305, 184:244] <- M * mu.j[9] M.fill <- matrix(, ncol = 366, nrow = 61) M.plot <- rbind(M.fill, M.plot) posx <- c(0.6, -0.3, 0.6, -0.75, 0.75, -0.75, 0.4, -0.75, -0.1) posy <- c(0.5, 0.4, 0.1, 0.25, -0.35, -0.1, -0.45, -0.2, -0.3) labels <- c("1", "2", "3", "4", "5", "6", "7", "8", "9") } if (length(mu.j) > 9) { cat("Warning! Only the first 9 axes are shown", "\n", "\n") M.plot <- matrix(0, nrow = 61 * 5, ncol = 61 * 6) M.plot[1:61, 184:244] <- M * mu.j[1] M.plot[62:122, 62:122] <- M * mu.j[2] M.plot[62:122, 245:305] <- M * mu.j[3] M.plot[123:183, 1:61] <- M * mu.j[4] M.plot[123:183, 306:366] <- M * mu.j[5] M.plot[184:244, 62:122] <- M * mu.j[6] M.plot[184:244, 245:305] <- M * mu.j[7] M.plot[245:305, 123:183] <- M * mu.j[8] M.plot[245:305, 184:244] <- M * mu.j[9] M.fill <- matrix(, ncol = 366, nrow = 61) M.plot <- rbind(M.fill, M.plot) posx <- c(0.6, -0.3, 0.6, -0.75, 0.75, -0.75, 0.4, -0.75, -0.1) posy <- c(0.5, 0.4, 0.1, 0.25, -0.35, -0.1, -0.45, -0.2, -0.3) labels <- c("1", "2", "3", "4", "5", "6", "7", "8", "9") } plots_ret <- list() if (PDF == TRUE) { pdf(paste(Groups[j], timestamp, "Functional Landscape.pdf")) persp(seq(0, 1, length.out = nrow(M.plot)), seq(0, 1, length.out = nrow(M.plot)), M.plot, box = FALSE, col = "#F0F0F0", border = "#969696", theta = control[1], r = control[2], phi = control[3]) dev.off() } else { persp(seq(0, 1, length.out = nrow(M.plot)), seq(0, 1, length.out = nrow(M.plot)), M.plot, box = FALSE, col = "#F0F0F0", border = "#969696", theta = control[1], r = control[2], phi = control[3]) plots_ret[[paste0(j,"-landscape")]] <- recordPlot() } #end if/else if PDF==TRUE if (LocPlot == TRUE) { drawScene(surfaceTriangles(1:nrow(M.plot), 1:ncol(M.plot), M.plot, color = "gray"), screen = list(z = 40, x = -60)) text(posx, posy, labels) plots_ret[[paste0(j,"Functional Landscape-NAMES")]] <- recordPlot() } } #end for j return(plots_ret) } #end FUNCTION multiDimBio/R/PPCA.R0000644000176200001440000000430712726020637013550 0ustar liggesusers#' A function to perform a probabilistic principle component analysis #' #' Performs a probabilistic principle component analysis using the function #' 'pca' in the package'pcaMethods' #' #' In PPCA an Expectation Maximization (EM) algorithm is used to fit a Gaussian #' latent variable model ( Tippping and Bishop (1999)). A latent variable model #' seeks to relate an observed vector of data to a lower dimensional vector of #' latent (or unobserved) variables, an approach similar to a factor analysis. #' Our implementation is a wrapper around the pcaMethods functions ppca and #' svdimpute (Stacklies et al. (2007)) and is included mainly for convience. #' The method used in pca was adapted from Roweis (1997) and a Matlab script #' developed by Jakob Verbeek. #' #' @param Data A (non-empty), numeric matrix of data values #' @param nPCs The number of resulting principle component axes. nPCs must be #' less than or equal to the number of columns in Data. #' @param CENTER A logical statement indicating whether data should be centered #' to mean 0, TRUE, or not, FALSE. #' @param SCALE A character string indicating which method should be used to #' scale the variances. The default setting is 'vector.' #' @return Returns an object of class 'pcaRes.' See documentation in the #' package code\link[pcaMethods:pcaMethods]{ pcaMethods} #' @seealso \code{\link[pcaMethods:pcaMethods]{pcaMethods}}, \code{\link{pca}} #' @references Roweis S (1997). EM algorithms for PCA and sensible PCA. Neural #' Inf. Proc. Syst., 10, 626 - 632. #' #' Stacklies W, Redestig H, Scholz M, Walther D, Selbig J (2007). pcaMethods - #' a Bioconductor package providing PCA methods for incomplete data. #' Bioinformatics, 23, 1164 - 1167. #' #' Tippping M, Bishop C (1999). Probabilistic Principle Componenet Analysis. #' Journal of the Royal Statistical Society. Series B (Statistical #' Methodology), 61(3), 611 - 622. #' @examples #' #' data(Nuclei) #' PPCA1<-PPCA(Nuclei, nPCs=2, CENTER=TRUE, SCALE='vector') #' Scores1<-PPCA1@scores #' #' @export PPCA PPCA <- function(Data, nPCs = 4, CENTER = TRUE, SCALE = "vector") { PC <- pca(Data, nPcs = nPCs, method = "ppca", center = CENTER, scale = SCALE) return(PC) } #end Function multiDimBio/R/completeData.R0000644000176200001440000001144412726020637015427 0ustar liggesusers#' Function to impute missing data. #' #' This function imputes missing data using a probabilistic principle component #' analysis framework and is a wrapper around functions implemented in the #' pcaMethods package (Stacklies et al. 2007), was proposed by Troyanskaya et #' al 2001 and is based on methods developed in Roweis 1997. #' #' #' @param data a (non-empty) numeric matrix of data values. #' @param n_pcs a (non-empty) numeric value indicating the desired number of #' principle component axes. #' @param cut.trait a number indicating the maximum proportion of missing #' traits before an individual is removed from data. A value of 1 will not #' remove any individuals and 0 will remove them all. #' @param cut.ind a number indicating the maximum proportion of individuals #' missing a trait score before that trait is removed from data. A value of 1 #' will not remove any traits and 0 will remove them all. #' @param show.test a logical statement indicating whether a diagnostic plot of #' the data imputation should be returned. #' @return Returns a list with two entries. \item{complete_dat}{ an object of #' class matrix with missing values imputed using a probabilistic principle #' component framework. } \item{plots}{ a list of plots stored as grid plots. } #' @seealso \code{\link[pcaMethods:pcaMethods]{pcaMethods}}, \code{\link{pca}} #' @references Roweis S (1997). EM algorithms for PCA and sensible PCA. Neural #' Inf. Proc. Syst., 10, 626 - 632. #' #' Stacklies W, Redestig H, Scholz M, Walther D, Selbig J (2007). pcaMethods - #' a Bioconductor package providing PCA methods for incomplete data. #' Bioinformatics, 23, 1164 - 1167. #' #' Troyanskaya O, Cantor M, Sherlock G, Brown P, Hastie T, Tibshirani R, #' Botstein D, Altman R (2001). Missing value estimation methods for DNA #' microarrays. Bioinformatics, 17(6), 520 - 5252. #' @examples #' #' data(Nuclei) #' npcs<-floor(ncol(Nuclei)/5) #' #' length(which(is.na(Nuclei))==TRUE) #' #' dat.comp<-completeData(data = Nuclei, n_pcs = npcs) #' #' length(which(is.na(dat.comp))==TRUE) #' #' @export completeData completeData <- function(data, n_pcs, cut.trait = 0.5, cut.ind = 0.5, show.test = TRUE) { # removing traits cut.trait <- round(nrow(data) * cut.trait) na.mat.tr <- is.na(data) na.sum.tr <- colSums(na.mat.tr) rm.na.tr <- which(na.sum.tr > cut.trait) if (length(rm.na.tr) > 0) { data <- data[ ,-rm.na.tr] cat("Traits removed: ", rm.na.tr, "\n", "\n") } # removing individuals cut.ind <- round(ncol(data) * cut.ind) na.mat.in <- is.na(data) na.sum.in <- rowSums(na.mat.in) rm.na.in <- which(na.sum.in > cut.ind) if (length(rm.na.in) > 0) { data <- data[-rm.na.in, ] cat("Individuals removed:", rm.na.in, "\n", "\n") } # ppca ppca <- pca(data, n_pcs = n_pcs, method = "ppca", center = TRUE, scale = "vector") # Impute missing data imp1 <- completeObs(ppca) plots_ret <- list() if (show.test == TRUE) { na.mat <- is.na(data) na.sum <- rowSums(na.mat) rm.NA <- which(na.sum > 0) if (length(rm.NA) > 0) { data.complete <- as.matrix(data[-rm.NA, ]) } #end if length(rm.NA) missing <- sum(na.sum)/(ncol(data) * nrow(data)) censor <- round((ncol(data.complete) * nrow(data.complete)) * missing) dataC <- matrix(data.complete, ncol = 1) make.NA <- sample(1:nrow(dataC), censor, replace = FALSE) dataC[make.NA, ] <- NA data.c.NA <- matrix(dataC, ncol = ncol(data.complete)) # ppca ppca.c.NA <- pca(data.c.NA, n_pcs = n_pcs, method = "ppca", center = TRUE, scale = "vector") # Impute missing data imp2 <- completeObs(ppca.c.NA) for (p in 1:ncol(imp2)) { layout(matrix(c(3, 1, 1, 3, 3, 1, 1, 3, 3, 2, 2, 3, 3, 2, 2, 3), ncol = 4, byrow = TRUE)) cols <- c("black", "red") ids <- rep(1, nrow(imp2)) ids[which(is.na(data.c.NA[, p]) == TRUE)] <- 2 plot(imp2[, p], data.complete[, p], main = colnames(data.complete)[p], xlab = "Imputed data", ylab = "Real data", pch = 16, col = cols[ids]) hist((imp2[, p] - data.complete[, p])/mean(data.complete[, p]), breaks = 10, xlab = "Relative Error", main = colnames(data.complete)[p], col = "grey") plots_ret[[p]] <- recordPlot() } #end for p } #end if show.test==TRUE return(list("complete_dat" = imp1, "plots" = plots_ret)) } #end function multiDimBio/R/FSelect.R0000644000176200001440000001323612726020637014353 0ustar liggesusers#' A Function to perform step-wise discriminant analysis using F statistics #' #' Select data using a F tests #' #' #' @param Data A (non-empty), numeric matrix of data values #' @param Group A (non-empty), vector indicating group membership. #' Length(unique(Group))==2 #' @param target The number of desired traits. Target cannot be greater than #' the number of columns in Data #' @param p.adj.method The method used to control for false discovery. The #' default setting is 'holm' #' @param Missing.Data The method used to handle missing data. The default, #' 'Complete' will use completeData to impute missing data, setting #' Missing.Data='Remove' will remove all individuals with missing data. #' FSelect cannot handle missing data. #' @return FSelect returns list containing at least the following components: #' #' \item{Selected}{ An ordered list indicating which columns were selected. } #' \item{F.Selected}{ An ordered list containing the F statistics for each #' column indicated in Selected. } \item{PrF}{ An ordered list containing the #' p values for each column indicated in Selected. } \item{PrNotes}{ A string #' indicating which method was used to control for multiple comparisons } #' \item{model}{ An lm object with the final model results. } #' @seealso \code{\link{completeData}} #' @references Costanza M, Afifi A (1979). Comparison of Stopping Rules in #' Forward Stepwise Discriminant Analysis. Journal of the American Statistical #' Association, pp. 777 - 78 #' #' Habbema J, Hermans J (1977). Selection of Variables in Discriminant Analysis #' by F - Statistics and Error Rate. Technometrics, 19(4), 487 - 493. #' #' Jennrich R (1977). Stepwise discriminant analysis, volume 3. New York Wiley #' Sons. #' @examples #' #' data(Nuclei) #' data(Groups) #' npcs<-floor(ncol(Nuclei)/5) #' #' dat.comp <- completeData(data = Nuclei, n_pcs = npcs) #' groups.use <- c(1,2) #' use.dat <- which(Groups==groups.use[1]|Groups==groups.use[2]) #' #' dat.use <- Nuclei[use.dat,] #' GR.use <- Groups[use.dat] #' #' #not run #' #FSelect(DAT.use,GR.use,3) #' #' @export FSelect FSelect <- function(Data, Group, target, p.adj.method = "holm", Missing.Data = "Complete") { Group <- as.factor(Group) Group <- as.numeric(Group) # Missing data if (Missing.Data == "Complete" & length(which(is.na(Data) == TRUE)) > 0) { cat("Missing data imputed using completeData to exlude missing data set Missing.Data=Remove", "\n", "\n") Data <- completeData(Data, n_pcs = floor(ncol(Data)/5), cut.trait = 1, cut.ind = 1)$complete_dat } if (Missing.Data == "Remove" & length(which(is.na(Data) == TRUE)) > 0) { cat("Individuals with missing data removed, to impute missing data set Missing.Data=Complete", "\n", "\n") rm <- na.omit(Data) Data <- Data[-attr(rm, "na.action"), ] Group <- Group[-attr(rm, "na.action")] } F.selected <- c() selected <- c() T.selected <- c() PrFs <- c() GRs <- unique(Group) if (length(GRs) > 2) { print("This isnt going to work, n.Groups needs to equal 2") } n1 <- length(which(Group == GRs[1])) n2 <- length(which(Group == GRs[2])) df1 <- n1 - 1 df2 <- n2 - 1 counter = 0 if (min(Data) < 1e-04) { TOL <- min(Data) } else { TOL <- 1e-04 } while (counter < target) { cols <- 1:ncol(Data) if (counter == 0) { Fs <- c() Ts <- c() for (c in cols) { m.i <- lda(Group ~ Data[, c], tol = TOL) scores <- predict(m.i) F.i <- partialF(m.i, Group, 0) # F.i<-V.w(scores$x[,1],Group)/V.b(scores$x[,1],Group) T.i <- sum(m.i$svd) Fs <- c(Fs, F.i) Ts <- c(Ts, T.i) } #end for c in 1:ncol(Data) select <- which(Fs == max(Fs)) PrF <- df(max(Fs), df1, df2) PrF <- p.adjust(PrF, method = p.adj.method, c) selected <- c(selected, cols[select]) F.selected <- c(F.selected, max(Fs)) T.selected <- c(T.selected, Ts[select]) PrFs <- c(PrFs, PrF) } else { c.in <- cols[-selected] Fs <- c() Ts <- c() for (c in c.in) { m.i <- lda(Group ~ Data[, c(selected, c)], tol = TOL) scores <- predict(m.i) F.i <- partialF(m.i, Group, T.selected[counter]) # F.i<-V.w(scores$x[,1],Group)/V.b(scores$x[,1],Group) T.i <- sum(m.i$svd) Fs <- c(Fs, F.i) Ts <- c(Ts, T.i) } #end for c in c.in select <- which(Fs == max(Fs)) if (max(Fs) < 1) { PrF <- 1 } else { PrF <- df(max(Fs), df1, df2) PrF <- p.adjust(PrF, method = p.adj.method, c) } #end if/else max(Fs) selected <- c(selected, c.in[select]) F.selected <- c(F.selected, max(Fs)) T.selected <- c(T.selected, Ts[select]) PrFs <- c(PrFs, PrF) } #end if/else counter counter <- counter + 1 } #end while loop # running the final model m.final <- lda(Group ~ Data[, selected], tol = TOL) notes <- paste("PrF has been", p.adj.method, "adjusted for", counter, "comparisons", sep = " ") results <- list("Selected" = selected, "F.Selected" = F.selected, "PrF" = PrFs, "PrNotes" = notes, "model" = m.final) return(results) } #end F.select multiDimBio/R/PermuteLDA.R0000644000176200001440000001450212726020637014765 0ustar liggesusers#' A function to determine whether two groups are in statistically different #' locations in multivariate space See Collyer and Adams 2007 #' #' The function calculates the multivariate distance between two groups across #' all traits and determines whether they differ signifcantly using a Monte #' Carlo randomization test. The Monte Carlo randomization creates a null #' distribution by randomizing the residual deviation from the group mean #' across all individuals. This method controls for heteroscedasticity and was #' designed by Collyer and Adams (2007) for use in analyzing data sets that #' have sparse groups sizes relative to the number of traits. #' #' Determining the statistical significance of a discriminate function analysis #' along with performing that analysis on sparse data sets, e.g. many traits #' observed on comparatively few individuals, is a challenge. Collyer and Adams #' (2007) developed a Monte Carlo based algorithm for addressing both of those #' issues. Briefly, the test uses the underlying Var/Cov structure of the data #' and randomizes the group membership to calculate a null distribution. This #' test simultaneously controls for heteroscedasticity, a common problem in #' sparse data sets and allows the approximation of a p-value for the test. For #' the original implementation and formulation of the method see Collyer and #' Adams (2007) or http://www.public.iastate. edu/~dcadams/software.html. #' Unlike the FSelect implementation, PermuteLDA will work properly with an #' arbitrary number of groups. The time required to run the algorithm is #' non-linear in the number of groups. #' #' @param Data A (non-empty), numeric matrix of data values #' @param Groups A (non-empty), vector indicating group membership. #' @param NPerm The number of permutations used to generate the null #' distribution. The default is 100. #' @param Missing.Data The method used to handle missing data. The default, #' 'Complete' will use CompleteData to impute missing data, setting #' Missing.Data='Remove' will remove all individuals with missing data. #' FSelect cannot handle missing data. #' @return Returns a data frame with four columns and the number of groups #' choose 2 rows. Each row is a pairwise comparison between groups. The #' column 'Pr' is the p value to reject the null hypothesis of no difference (a #' value in 'Pr' < 0.05 would result in rejecting the hypothesis that the two #' groups are not different. The column 'Distance' is the multivariate #' distance between the two groups. #' @seealso \code{\link{PermuteLDA}} #' @references Collyer M, Adams D (2007). Analysis of Two - State Multivariate #' Phenotypic Change in Ecological Studies. Ecology, 88(3), 683 - 692. #' #' For an implementation of the original method coded in R see #' http://www.public.iastate. edu/~dcadams/software.html. #' @examples #' #' data(Nuclei) #' data(Groups) #' PermuteLDA(Nuclei,Groups,50) #' #' @export PermuteLDA PermuteLDA <- function(Data, Groups, NPerm, Missing.Data = "Complete") { Groups <- as.factor(Groups) Groups <- as.numeric(Groups) # Missing data if (Missing.Data == "Complete" & length(which(is.na(Data) == TRUE)) > 0) { warning("Missing data imputed using completeData to exlude missing data set Missing.Data=Remove", "\n", "\n") Data <- completeData(Data, n_pcs = floor(ncol(Data)/5), cut.trait = 1, cut.ind = 1)$complete_dat } if (Missing.Data == "Remove" & length(which(is.na(Data) == TRUE)) > 0) { warning("Individuals with missing data removed, to impute missing data set Missing.Data=Complete", "\n", "\n") rm <- na.omit(Data) Data <- Data[-attr(rm, "na.action"), ] Groups <- Groups[-attr(rm, "na.action")] } # 1 comparison matrix ngroups <- length(unique(Groups)) comp <- makeCompMat(ngroups) # 2 comparisons results.i <- c() dists.i <- c() for (i in 1:nrow(comp)) { # selecting groups use.i <- which(Groups == comp[i, 1] | Groups == comp[i, 2]) dat.i <- as.matrix(Data[use.i, ]) group.i <- rep(1, nrow(dat.i)) group2 <- which(Groups[use.i] == comp[i, 2]) group.i[group2] <- -1 # estimating parameters m.i <- lm(dat.i ~ group.i) # least-square mean obs.i <- by(m.i$fitted.value, group.i, colMeans) obs.i <- rbind(obs.i$"1", obs.i$"-1") # difference between groups obs.diff.i <- obs.i[1, ] - obs.i[2, ] dist.obs <- sqrt(t(obs.diff.i) %*% obs.diff.i) # expected values exp.i <- matrix(ncol = ncol(dat.i), nrow = nrow(dat.i)) for (k in 1:ncol(exp.i)) { exp.i[which(group.i == 1), k] <- obs.i[1, k] exp.i[-which(group.i == 1), k] <- obs.i[2, k] } #end for k in 1:ncol # residauls res.i <- dat.i - exp.i # permutation dist.i <- dist.obs p.i <- 1 indiv.i <- 1:nrow(res.i) group1 <- c() group2 <- c() for (j in 1:NPerm) { # randoms sample and create data rand.j <- sample(indiv.i) dat.j <- dat.i[rand.j, ] # estimating parameters m.j <- lm(dat.j ~ group.i) # least-square mean obs.j <- by(m.j$fitted.value, group.i, colMeans) obs.j <- rbind(obs.j$"1", obs.j$"-1") # difference between groups obs.diff.j <- obs.j[1, ] - obs.j[2, ] dist.obs.j <- sqrt(t(obs.diff.j) %*% obs.diff.j) # record results dist.i <- c(dist.i, dist.obs.j) p.j <- ifelse(dist.obs.j >= dist.obs, 1, 0) p.i <- c(p.i, p.j) group1 <- c(group1, sum(obs.j[1, ])) group2 <- c(group2, sum(obs.j[2, ])) } #end for j in NPerm results.i <- c(results.i, sum(p.i)/length(p.i)) dists.i <- c(dists.i, dist.obs) } #end for i in 1:nrow(comp) resultsP <- cbind(comp, results.i, dists.i) resultsP <- as.data.frame(resultsP) colnames(resultsP) <- c("Group 1", "Group 2", "Pr", "Distance") return(resultsP) } #end function permute.lda multiDimBio/R/makeCompMat.R0000644000176200001440000000212412726020637015216 0ustar liggesusers#' A function to create a pairwise comparison matrix #' #' This function creates a pairwise comparison matrix for n groups. All #' possible pairwise combinations are created, with rows in the matrix equal to #' the desired comparison. #' #' #' @param ng A single number indicating the total number of unique groups #' @return Returns a matrix with two columns and ng choose 2 rows. #' @seealso \code{\link{PermuteLDA}} #' @examples #' #' makeCompMat(3) #' #' makeCompMat(4) #' #' data(Groups) #' NGroups<-length(unique(Groups)) #' #' makeCompMat(NGroups) #' #' @export makeCompMat makeCompMat <- function(ng) { comparisons <- c() for (i in 1:ng) { c.i <- numeric(0) for (j in i:ng) { c.j <- c(i, j) if ((i - j) == 0) { next } else { c.i <- c(c.i, c.j) } #end if/else i-j == 0 } #end for j in i:ng comparisons <- c(comparisons, c.i) } #end for i in 1:ng compare <- matrix(comparisons, ncol = 2, byrow = TRUE) return(compare) } #end function makeCompMat multiDimBio/R/Loadings.R0000644000176200001440000000562012726020637014564 0ustar liggesusers#' A function to visualize trait loadings onto discriminant function and #' principle component axes #' #' This function produces barplots representative of the contribution of a #' particular trait or variable to either a discriminant function or principle #' component axis. #' #' #' @param DATA A (non-empty) numeric matrix with trait values #' @param GROUPS A (non-empty)factor vector indicating the group membership of #' each row in DATA #' @param method An optional list indicating whether the results for a #' principle component analysis, 'PCA', or linear discriminant analysis, 'LDA' #' should be performed. #' @return Outputs a list with values and plots for each test listed in method. #' @seealso \code{\link{pca}}, \code{\link{lda}} #' @examples #' #' data(Nuclei) #' data(Groups) #' Loadings(Nuclei, Groups, method=c("PCA", "LDA")) #' #' @export Loadings Loadings <- function(DATA, GROUPS, method = c("PCA", "LDA")) { results <- list() plots_ret <- list() if (sum(method == "PCA") > 0) { nPCS = floor(ncol(DATA)/5) PPCA <- pca(DATA, nPcs = nPCS, method = "ppca", center = TRUE, scale = "vector") OUT <- PPCA@loadings rownames(OUT) <- 1:nrow(OUT) NAMES <- data.frame(1:nrow(OUT), rownames(PPCA@loadings)) colnames(NAMES) <- c("Number", "Trait") results[["Number_Trait_PCA"]] <- NAMES results[["Loadings"]] <- PPCA@loadings for (i in 1:nPCS) { title <- paste("PC", i, sep = "") barplot(abs(OUT[, i]), main = paste(title, "- Variance Explained = ", round(PPCA@R2[i], 3)), cex.names = 0.5) plots_ret[[paste(i, "PCA-Loadings.pdf", sep = "_")]] <- recordPlot() } #end for i } #end if PCA if (sum(method == "LDA") > 0) { if (min(DATA, na.rm = TRUE) < 1e-04) { TOL <- min(DATA, na.rm = TRUE) } else { TOL <- 1e-04 } LDA <- lda(GROUPS ~ DATA, tol = TOL) OUT <- LDA$scaling rownames(OUT) <- 1:nrow(OUT) NAMES <- data.frame(1:nrow(OUT), rownames(LDA$scaling)) colnames(NAMES) <- c("Number", "Trait") results[["Number_Trait_PCA"]] <- NAMES results[["Loadings"]] <- LDA$scaling for (j in 1:ncol(OUT)) { title <- paste("LD", j, sep = "") barplot(abs(OUT[, j]), main = title, cex.names = 0.5) plots_ret[[paste(j, "LDA-Loadings.pdf", sep = "_")]] <- recordPlot() } #end for j } #end if LDA return(list("results" = results, "plots" = plots_ret)) } #end FUNCTION multiDimBio/R/partialF.R0000644000176200001440000000307012726020637014563 0ustar liggesusers#' A function to compute partial F statistics #' #' This is an internal function used in FSelect. It can only be used for two #' groups. The partial F statistic is the additional contribution to the model #' from adding one more trait. #' #' #' @param m.lda An object of class 'lda' #' @param GROUP A factor vector indicating group membership #' @param T_pm1 The F statistic calculated for a discriminant analysis with #' only the most informative trait. #' @return Returns a partial F statistic #' @seealso \code{\link{FSelect}} #' @references Habbema J, Hermans J (1977). Selection of Variables in #' Discriminant Analysis by F-Statistics and Error Rate. Technometrics, 19(4), #' 487 - 493. #' @examples #' #' #Internal function used in FSelect #' #' data(Nuclei) #' data(Groups) #' #' NPC<-floor(ncol(Nuclei)/5) #' #' DAT.comp<-completeData(Nuclei, n_pcs = NPC) #' Groups.use<-c(1,2) #' use.DAT<-which(Groups==Groups.use[1]|Groups==Groups.use[2]) #' #' DAT.use<-Nuclei[use.DAT,] #' GR.use<-Groups[use.DAT] #' #' traitA<-2 #' #' mlda<-MASS::lda(GR.use~DAT.use[,traitA]) #' #' F1<-partialF(mlda,GR.use,0) #' #' traitB<-1 #' #' mlda2<-MASS::lda(GR.use~DAT.use[,c(traitA,traitB)]) #' #' partialF(mlda2,GR.use,F1) #' #' #' @export partialF partialF <- function(m.lda, GROUP, T_pm1) { GRS <- unique(GROUP) n1 <- length(which(GROUP == GRS[1])) n2 <- length(which(GROUP == GRS[2])) v <- n1 + n2 - 2 p <- ncol(m.lda$means) T_p <- sum(m.lda$svd) T_pm1 <- T_pm1 Fp <- (v - p + 1) * ((T_p - T_pm1)/(v + T_pm1)) return(Fp) } #end partialF multiDimBio/R/getP.R0000644000176200001440000000261212726020637013721 0ustar liggesusers#' An internal function for getting empirical p values #' #' Simulates p values. #' #' #' @param ndads a (non-empty) numeric value indicating the number of dads. #' @param mm a (non-empty) numeric value indicating the mean number of #' offspring per dad per bin (normal dist). #' @param vv a (non-empty) numeric value indicating the variance in offspring #' per dad per bin (normal dist). #' @param tau2 a (non-empty) numeric value indicating the dad effect #' (narrow-sense heritability ~ tau2/(tau2+(pi/sqrt(3))^2)). #' @param nperms a (non-empty) numeric value indicating the number of bootstrap #' permutations to use for caluclating a p value. #' @param nsims a (non-empty) numeric value indicating the number of #' simulations to run per parameter combination. #' @param nbins a (non-empty) numeric value indicating the number of bins, data #' are pooled before analysis. #' @return Returns a vector of simulated p values. The list contains: #' @examples #' #' ndads <- c(9,18) #' mm <- 4.629634 #' vv <- 6.31339 #' tau2 <- c(0,0.5) #' nperms <- 2 #' nsims <- 2 #' nbins <- 3 #' getP(ndads = ndads, mm = mm, vv = vv, tau2 = tau2, nperms = nperms, nsims = nsims, nbins = nbins) #' #' @export getP getP <- function(ndads, mm, vv, tau2, nperms, nsims, nbins) { ps <- c() for (i in 1:nsims) { p.i <- simPower(ndads, mm, vv, tau2, nperms, nbins) ps <- c(ps, p.i) } return(ps) } #end getP multiDimBio/R/PercentMax.R0000644000176200001440000000163112726020637015070 0ustar liggesusers#' A function to scale data to the percent of the maximum observed #' #' This function rescales the columns in a data matrix to the percent of the #' maximum observed value. The variance is not scaled and missing values are #' ignored in the calculation. #' #' #' @param DATA A (non-empty) matrix with data values. Columns should be #' different traits and rows unique observations of those traits #' @return Returns a matrix with the same dimensions as DATA. #' @seealso \code{\link{ZTrans}}, \code{\link{MeanCent}} #' @examples #' #' data(Nuclei) #' #' colMeans(Nuclei, na.rm=TRUE) #' #' Nuclei.PM<-PercentMax(Nuclei) #' #' colMeans(Nuclei.PM, na.rm=TRUE) #' #' @export PercentMax PercentMax <- function(DATA) { cmax <- apply(DATA, 2, max, na.rm = TRUE) cmat <- matrix(cmax, ncol = ncol(DATA), nrow = nrow(DATA), byrow = TRUE) PMX.DATA <- DATA/cmat return(PMX.DATA) } #end FUNCTION multiDimBio/R/simPower.R0000644000176200001440000000515512737462571014645 0ustar liggesusers#' An internal function of binomPower, which actually calculates the p value #' #' An internal function of binomPower, which actually calculates the p value. #' #' #' @param ndads a (non-empty) numeric value indicating the number of dads. #' @param mm a (non-empty) numeric value indicating the mean number of #' offspring per dad per bin (normal dist). #' @param vv a (non-empty) numeric value indicating the variance in offspring #' per dad per bin (normal dist). #' @param tau2 a (non-empty) numeric value indicating the dad effect #' (narrow-sense heritability ~ tau2/(tau2+(pi/sqrt(3))^2)). #' @param nperms a (non-empty) numeric value indicating the number of bootstrap #' permutations to use for caluclating a p value. #' @param nbins a (non-empty) numeric value indicating the number of bins, data #' are pooled before analysis. #' @return Returns a p value for a given set of conditions over a specificed #' number of bootstrap permutations. #' @examples #' #' #not run #' #' @export simPower simPower <- function(ndads, mm, vv, tau2, nperms, nbins) { if(mm >= vv){ stop("mm must be less than vv.") } mylogit = function(x) log(x/{ 1 - x }) ilogit = function(x) 1/{ 1 + exp(-x) } swimprob = ilogit(rnorm(ndads, 0, sqrt(tau2))) mytable = NULL for (i in 1:ndads) { bincounts = pmax(1, rnbinom(nbins, mu = mm, size = mm^2/{ vv - mm })) swim = rbinom(3, bincounts, swimprob[i]) set = bincounts - swim theserows = data.frame(Dad = i, Bin = 1:nbins, set = set, swim = swim) mytable = rbind(mytable, theserows) } ncases = nrow(mytable) empfreq = aggregate(swim ~ Dad, data = mytable, sum)[, 2]/aggregate(swim + set ~ Dad, data = mytable, sum)[, 2] # Compute the likelihood ratio statistic hm0 = glm(cbind(swim, set) ~ 1, data = mytable, family = binomial) hm1 = glmer(cbind(swim, set) ~ (1 | Dad), data = mytable, family = binomial, REML = FALSE) Dsim = as.numeric(deviance(hm0) - deviance(hm1)) perm1 <- c() for (i in 1:nperms) { neworder <- sample(1:ncases, ncases) ptable <- data.frame(Dad = mytable$Dad, Bin = mytable$Bin, swim = mytable$swim[neworder], set = mytable$set[neworder]) hm0 <- glm(cbind(swim, set) ~ 1, data = ptable, family = binomial) hm1 <- glmer(cbind(swim, set) ~ (1 | Dad), data = ptable, family = binomial) D <- as.numeric(deviance(hm0) - deviance(hm1)) perm1 <- c(perm1, D) } pval <- length(which(perm1 > Dsim))/length(perm1) return(pval) } multiDimBio/R/plotBinomPower.R0000644000176200001440000001006513024771310015774 0ustar liggesusers#' A function to plot the results of a binomPower run #' #' A function to plot the results of a binomPower run. #' #' #' @param datPlotBig a (non-empty) matrix of data values, with columns trueTau, #' ndads, trueTau2 #' @param params a (non-empty) matrix of parameter values, with columns mm and #' vv. #' @return Returns a list of two plots of the binomPower analysis results. #' @examples #' #' #not run #' #' @export plotBinomPower plotBinomPower <- function(datPlotBig, params) { if (length(unique(params$mm)) > 1) { stop("function does not work with more than one mean offspring number") } if (length(unique(params$vv)) > 1) { stop("function does not work with more than one var in offspring number") } # solving ggplot2 binding error ndads <- NULL # preping data setss datPlotBig$trueTau <- as.factor(datPlotBig$trueTau) datPlotBig$ndads <- round(as.numeric(as.character(datPlotBig$ndads))) datPlotBig$ndads <- round(datPlotBig$ndads, 1) datPlotBig$ndads <- as.factor(datPlotBig$ndads) # colors cols <- colorRampPalette(c("#FFF7FB", "#74A9CF", "#023858"), space = "Lab") pal <- cols(length(unique(datPlotBig$ndads))) ###### plotting# plots_ret <- list() # values h2 > 0 use <- which(datPlotBig$trueTau2 > 0) dat.plot <- datPlotBig[use, ] # transforming variances to heritability h2 <- 4 * (dat.plot$trueTau2/(dat.plot$trueTau2 + (pi/sqrt(3))^2)) auc <- dat.plot$trueP/(dat.plot$trueP + dat.plot$falseP) dat.plot <- data.frame(dat.plot, h2, auc) p <- ggplot(dat.plot, aes(h2, auc, colour = ndads, group = ndads)) p.save <- p + geom_line(size = 1.5) + scale_colour_manual(values = pal) + xlab("h2") + ylab("auc") + theme(legend.position = "right", legend.key = element_rect(fill = "gray"), legend.background = element_rect(fill = "#ffffffaa", colour = "black"), panel.background = element_rect(fill = "gray", colour = "black"), axis.text.y = element_text(colour = "black", size = 15), axis.text.x = element_text(colour = "black", size = 20), axis.title = element_text(colour = "black", size = 20), panel.grid.minor = element_line(colour = "#00000050", linetype = 3), panel.grid.major = element_line(colour = "#00000060", linetype = 3)) + scale_y_continuous(expand = c(0.005, 0.005)) + scale_x_continuous(expand = c(0, 0.003), limits = c(0, max(dat.plot$h2))) plots_ret[["powerCurveBinom"]] <- p.save # values h2 == 0 use <- which(datPlotBig$trueTau2 == 0) dat.plot <- datPlotBig[use, ] auc <- dat.plot$trueP/(dat.plot$trueP + dat.plot$falseP) rownames(dat.plot) <- NULL dat.plot <- data.frame(dat.plot, h2, auc) vals <- by(dat.plot[, "auc"], dat.plot[, "ndads"], mean, na.rm = TRUE) dat.plot <- data.frame(as.factor(as.numeric(names(vals))), unlist(vals)[1:length(vals)]) colnames(dat.plot) <- c("ndads", "auc") p1 <- ggplot(dat.plot, aes(x = ndads, y = auc, fill = ndads)) p1.save <- p1 + geom_bar(stat = "identity") + scale_fill_manual(values = pal) + xlab("number of dads") + ylab("auc (true h2 = 0)") + theme(legend.position = "right", legend.key = element_rect(fill = "gray"), legend.background = element_rect(fill = "#ffffffaa", colour = "black"), panel.background = element_rect(fill = "gray", colour = "black"), axis.text.y = element_text(colour = "black", size = 15), axis.text.x = element_text(colour = "black", size = 20), axis.title = element_text(colour = "black", size = 20), panel.grid.minor = element_line(colour = "#00000050", linetype = 3), panel.grid.major = element_line(colour = "#00000060", linetype = 3)) + scale_y_continuous(expand = c(0, 0), limits = c(0, 1)) + scale_x_discrete(expand = c(0.01, 0.01)) plots_ret[["powerTrue_h2_equal_0_Binom"]] <- p1.save return(plots_ret) } multiDimBio/MD50000644000176200001440000000537513532240373013014 0ustar liggesusers4093d955aae54a44dbf36176fb90fa4a *DESCRIPTION c12a487762c715d822d61d17d17cceb9 *NAMESPACE bb1eb2d42e8aa058c9efdaaa0e2adff6 *R/FSelect.R ab38d3fb814d3180d59619e16eae1d82 *R/IntPlot.R ca24bc0a84922527dd36cd5b222099f3 *R/LandscapePlot.R 343e4f32bc5018309f37d49b3ce18eca *R/Loadings.R a71fff6693c3f1c44d73ff650a81cf56 *R/MeanCent.R 56a5660e805df1f98808c8c13d4c09e7 *R/PPCA.R 77e59d482a9b9556a0d4c4d727b37f5b *R/PercentMax.R 3892916175fdf1da3d725afb03d6522a *R/PermuteLDA.R 7c534eab05179a8539790f5bcff4face *R/Power.R ebbe8b00393c7066ae6714e7fa107850 *R/ZTrans.R f387ef2226b64ec14dcc658460f5b4bf *R/binomPower.R a6fcccc84269f9dcef33f7f79eeb12b6 *R/boxWhisker.R 0be11654bcc8d86f690cc093e6b3ea5d *R/completeData.R daed90ee197ecbfd6754242f208dc796 *R/getP.R e8c7e535528639cabc5c2de27f729727 *R/h2Estimate.R d1ac5fcd383b87c816bac5fb5b49f7c2 *R/ldaPlot.R f1dfb3eab235e0bf62e2b054a16d2fa4 *R/makeCompMat.R 395f01ec86ff9e2c142af5545333a4e7 *R/multiDimBio-package.R b29d8dd6dfd04bf5d1c05fc1f007ac6a *R/partialF.R 212589c55caad356ad5a254b6adbb00d *R/plotBinomPower.R 7ba10e5d04e44a026afafebbc33bfcd9 *R/simPower.R 354b44ff476c9c469e4b7db3f945c0ba *data/CondA.RData 7bf61b2fce8bb11143dcf40409e0b187 *data/CondB.RData e622fa232553c6001d5fa59beef3ee7b *data/Dyad.RData 033286189fe8b1ba2a7bae73ee094b47 *data/Groups.RData 3c234a8f368eb7f18075de135f2689d4 *data/Nuclei.RData 8b39adf516449ba9960910d4d315439f *data/Scores.RData d42155009c05b882a695291415f1ed06 *inst/CITATION c59910f08a398b3ae514e04a7cf88d21 *man/CondA.Rd 8307e4d32d4ea4b53d349ac1fb5ddb8c *man/CondB.Rd 4a711c58eb547ee45796cbd7a9b66b5e *man/Dyad.Rd 2e85244fb49b52b33adff04155964b06 *man/FSelect.Rd deb84fe471957a769395f8901974a354 *man/Groups.Rd 3b530e9f54caf71c42417158b6458344 *man/IntPlot.Rd b9dcca4ce6ac859bf62babeaf6b970ee *man/LandscapePlot.Rd 2dc6dde39d873fcdb56fea7fbc6304a3 *man/Loadings.Rd 41468ba1b57fa84bf448b42b97780b31 *man/MeanCent.Rd 54bb4994e020929aa48270eb0d546462 *man/Nuclei.Rd 67a4afdd073b0cad534a39fe68051c1a *man/PPCA.Rd 6baa58888d1e20778bf7242d96a339a7 *man/PercentMax.Rd 00f0aff9a9a3d31fe739d833b484cf08 *man/PermuteLDA.Rd 157daaad432e6f056b61352245ba7e05 *man/Power.Rd 371d02f9b95dc505f32725bda90242e7 *man/Scores.Rd b751c91a79d9c06c3fbc9fa0ee50d7eb *man/binomPower.Rd c5ef2d6ebf481b7e53bda361fb5b5d37 *man/boxWhisker.Rd ebd662d6532371bdfe899b6052b28f31 *man/completeData.Rd c8acb06150d95b2bcaa062cdfa6bb898 *man/getP.Rd e25199fa7da60c848caf996ff83059a3 *man/h2Estimate.Rd c5e8d45fe24b134aec14d4ff7953276c *man/ldaPlot.Rd e4b064412aef6a740097c72669876e5c *man/makeCompMat.Rd 2620291dc576138813940932b2b99986 *man/multiDimBio-package.Rd 26406eaf4420b56e2e0be4afa8593f7f *man/partialF.Rd 8141c35a56f420580dac1c91ceb5ec24 *man/plotBinomPower.Rd db754fcbe048041ebbb9298b1b2a33f7 *man/simPower.Rd 241d656068ae9fc8833ea72001113596 *man/ztrans.rd multiDimBio/inst/0000755000176200001440000000000013407377315013457 5ustar liggesusersmultiDimBio/inst/CITATION0000644000176200001440000000160112726021550014600 0ustar liggesuserscitHeader("To cite", sQuote("multiDimBio"), "in publications, please use:") citEntry(entry = "unpublished", title = "multiDimBio: An R Package for the Design, Analysis, and Visualization of Systems Biology Experiments", author = personList( as.person("Samuel V. Scarpino"), as.person("Ross Gillette"), as.person("David Crews") ), note = "R Package", url = "https://cran.r-project.org/package=multiDimBio", textVersion = paste( "Scarpino SV, R Gillette, D Crews. (2013) multiDimBio: An R Package for the Design, Analysis, and Visualization of Systems Biology Experiments (R package), http://cran.r-project.org/web/packages/multiDimBio/index.html") ) citFooter("For references to additional papers describing the use of", sQuote("multiDimBio"), "see", sQuote("help(\"multiDimBio\")."))