ggm/0000755000176200001440000000000013622245342011027 5ustar liggesusersggm/NAMESPACE0000644000176200001440000000224313617231154012247 0ustar liggesusersexport(basiSet, bd, edgematrix, swp, adjMatrix, bfsearch, ch, checkIdent, cmpGraph, conComp, correlations, cycleMatrix, DAG, dSep, shipley.test, allEdges, findPath, fitDag, fitDagLatent, fitCovGraph, fitConGraph, fundCycles, In, icf,topOrder, inducedCovGraph, transClos, inducedConGraph, inducedRegGraph,inducedChainGraph, inducedDAG,isAcyclic, isGident, pa, parcor, pcor, pcor.test, rcorr, rsphere, topSort, triDec, UG, rnormDag, drawGraph, makeMG, icfmag, fitAncestralGraph, unmakeMG, essentialGraph, AG, grMAT, MAG, MarkEqRcg, MarkEqMag, Max, MRG, msep, MSG, RepMarBG, RepMarDAG, RepMarUG, RG, RR, SG, SPl, rem, isAG, isADMG, plotGraph, DG, binve, blkdiag, blodiag, diagv, fitmlogit, mat.mlogit, marg.param, null, powerset) import(igraph) importFrom("graphics", "arrows", "lines", "locator", "par", "plot", "points", "segments", "text") importFrom("stats", "binomial", "coef", "cor", "formula", "glm", "lm", "model.frame", "model.matrix", "model.response", "pchisq", "pt", "rnorm", "terms", "var") importFrom("methods", "is") ggm/data/0000755000176200001440000000000013525604422011740 5ustar liggesusersggm/data/surdata.rda0000644000176200001440000001214612265547115014104 0ustar liggesusers\y8kߦJuJpH- D#"c"$cQJ -Ca<{cMI}sNR^?Wq-_0ݿ屾4>{:JTߥkwO|tOO3_Z|TR:ӽNӵ_j?6;uVqgS*p>z{smTG|{w^G=ߖ?翕5?'~t9 s;J$}pl=Z2u8elزiϸbr*ۋbns@kN٘px+&FE:0x$rR.o=IQIȕH%^FꯞKhλbإ"WK݄R971̀*g.x>`pu6hdߔp s훍~+4ҵ *˭͖0t!l͚DZʦ> HI#cA" ,*5Z7V ;Rl)k'D '\^GX.t]QPueݐ;bm.K w= i,I|1#%޻8*= ݷUIL9vV Mg{*3e uЮrNS^Z:>/J^'ˎym^jJXt(֜UmJpj+wqAqb#m0$̝PqjV`E@ZX'M7?'b,d/3dXb{}SGG'8dͤS{Zzr %GŤ=ƷLj_ &S8kN1l֞RP՝`_ m^rfDV̈ a lDZ1 UL7cOg?o`c٪Г*S&02b.$ Fu햶_m?|6?Ǡ_~/ Be@> -@z]8V'}GM P]Wy )+_,ҖEq]89+`*em:JZJpdPE,l(7 T!WM8_HX#u7GFSN- LWUYU@sJNol>L]b?Ba`E0t](^h=gsY׾DKk0S޷M>p/9tuPEÎꃜt;r[qQY>P*whݯ{zX]?eoe-=B8J܂f:^{'hVxJLua+N`9xs%ג VjBo6VU Bҝe.V.0Q)$ǾC~6S7 %E|U%,Do}_Zr}%+81ɳ$q`܍e,umN@(n,_]^g{En@_3=jD>~:>qАi^\7;tp:˞yS`ZZ_k.P%,.yGi1YɕjLDw?×6vص*ڑ o.exBGO,OkQک n[9z.&Hl{S/~Jyӕ{a/üyM:ԟZ~QҢWqE 8?0#;O5r֥"䳠ڝ\X`_VKRd""1K5Ghʸf 8OzoȇOD&g,k?C~Yeq,|Z(L9% 7q%[s"К1qx. ܲ0UJ/ R+Wvڲپ:E O.RsE~2~-LƤDjԱPby%oS~rDMȯOlqppLK1I +rwC@5ekTQ{Nݚ*,_iu+R?&R蹤T  ,XSe )Kl6PXe33BYڂ+[גXjf(/gF^u0;G,MܱmR*,cCw"e'7׏K1d+{.kRH7Y]5L2ǜJR)Ҏ.lHU|- 6ҎDʧ@S09F,)oHg^8$$(?֖`,v2!#b{S?(4dBKifey7rsV{pcXHO?,溹17uQcmHǢ,C``W`0T2C9X͏3նsb2z/.wd,e&ǰp"CPZjw^$6'5Cx~o321AY(}/b)iMqhɵ5r=0D ezSiS *5>$Qy-eKv+ ío/ZGPoߵ:߿E[s[@tYr՟tlVWw+PX^Ji03,CtyG(oY60bհy=f*z:ϙ\x .cL%|6u :)J&n,e3΍=By)X:ǪA!@ wt6U7 u6 8tgH7]'T6!ܪ嘓 v^ OnzgCf+~6 ͷΔsۡG1Ȭ(d?ɊCe5ܰ>1 . qom9T%3@k̺40, wjR聁l>fdxi~BcE6k@'Shz}+EI`{PKK+9.Eb@px1t[r=:Br kUHKU#9Po<_Ngh<ֿZ3838383838=8ؿ}om O?9f-X|˶о:j8T +;l 0s`pP\ǜ xA0T 0Fc6Tggm/data/glucose.rda0000644000176200001440000000162412265547115014101 0ustar liggesusersVKOQ RR鴝>WC₝q(X ܸ0f­;MܰaoƥDpιspuuc14-r rq,hc8g>;Ҵ(WœVY|wG>1XY zF.~:_ ) #ӈQKAuмtks &u qrLuJx>)wSj!6ɖ&4"abbͷE{ߡ+-?~b]j;;Y7h'\[aKO\o6T]ye'q=X}+vVv<^}b/>pg2ū+ybRsݯ#aMNy-O)}f~&lOtpNآ= ~[:eKi6Gb_ KΣ4a3c|ͪy{~>fAl a$8$\'aK$\Vlvmtu>I!ོ췠87 ggm/data/marks.rda0000644000176200001440000000206112741670335013551 0ustar liggesusersuMoEIM ,PŢB(woCb'"ۗ "I8O/C}ْs{ϝ3YyRZKoz6(=Nszsg cd=i-~G0 iÔ0J9_] |m~Y^'q-?㜇_x?3c{؁'_O~:c>_ͺ~mue`kO2)8[?eC;0?֯(rqncݱ.ߡx |jq𻼎ۻ? N> D~wl=zуߛ~^qtz̟xo8uw'ڸO]tf~_G?)qQ:'a=]'>_'Vģ tg?E,<+YW}qKo6 /I>M| <0@> zO1?=~Pzr[ޑ1OtOUq'+~ܷz7[?}cyP: ?x7`ēz1B)gK{]O>xw:usN~!z.o y0 ~9G͹Eߤ ӧ #:7/n.ϧ_woeW_r;d^29.wm[2FIQhh4:X}HBRJZFVN-q(QG!B8 q(QG)R8Jq(QG%J8*qTQG%J8jqQG-Z8jq4hшG#F8q4hъG+V8ZqhъG'N8:qtщG?A:7nsnF Eggm/data/anger.rda0000644000176200001440000000031112265547115013524 0ustar liggesusers r0b```b`b&f H020piļ"<#4cYW,Lg`:cS~Qb`PE#L(6B&ZE7EEH5R_Fܻп&߁Ѝߜw9Lwy 0 0]p&.uԵ ņp)[Ay&)0JcC+yfq~vn~絳SwFa=iG =Eg߈V;f٬޻ 6`+8ZWh'h%H:S36l6jZ8{{) wPan,xށ9 / "5 /3j "EƳZ< Z@+0kV$]G8  th]}kjdQE.;Z/_׷>m& ֒YqSIq{)q:zS^ۺb_`C\εꞚT*0ekT㯷:o c҇%RSݖ|Ͽ~) 0, the order of the correlation matrix.} } \details{ The algorithm uses \code{\link{rsphere}} to generate \eqn{d} vectors on a sphere in \eqn{d}-space. If \eqn{Z} is a matrix with such vectors as rows, then the random correlation matrix is \eqn{ZZ'}. } \value{ a correlation matrix of order \code{d}. } \references{Marshall, G.\& Olkin, I. (1984).Generating correlation matrices. \emph{SIAM J. Sci. Stat. Comput.}, 5, 2, 470--475.} \author{Giovanni M. Marchetti} \seealso{\code{\link{rsphere}}} \examples{ ## A random correlation matrix of order 3 rcorr(3) ## A random correlation matrix of order 5 rcorr(5) } \keyword{distribution} \keyword{multivariate} ggm/man/glucose.Rd0000644000176200001440000000441612312776012013535 0ustar liggesusers\name{glucose} \alias{glucose} \docType{data} \title{Glucose control} \description{ Data on glucose control of diabetes patients. } \usage{data(glucose)} \format{ A data frame with 68 observations on the following 8 variables. \describe{ \item{Y}{a numeric vector, Glucose control (glycosylated haemoglobin), values up to about 7 or 8 indicate good glucose control.} \item{X}{a numeric vector, a score for knowledge about the illness.} \item{Z}{a numeric vector, a score for fatalistic externality (mere chance determines what occurs).} \item{U}{a numeric vector, a score for social externality (powerful others are responsible).} \item{V}{a numeric vector, a score for internality (the patient is him or herself responsible).} \item{W}{a numeric vector, duration of the illness in years.} \item{A}{a numeric vector, level of education, with levels \code{-1}: at least 13 years of formal schooling, \code{1}: less then 13 years.} \item{B}{a numeric vector, gender with levels \code{-1}: females, \code{1}: males.} } } \details{ Data on 68 patients with fewer than 25 years of diabetes. They were collected at the University of Mainz to identify psychological and socio-economic variables possibly important for glucose control, when patients choose the appropriate dose of treatment depending on the level of blood glucose measured several times per day. The variable of primary interest is \code{Y}, glucose control, measured by glycosylated haemoglobin. \code{X}, knowledge about the illness, is a response of secondary interest. Variables \code{Z}, \code{U} and \code{V} measure patients' type of attribution, called fatalistic externality, social externality and internality. These are intermediate variables. Background variables are \code{W}, the duration of the illness, \code{A} the duration of formal schooling and \code{B}, gender. The background variables \code{A} and \code{B} are binary variables with coding \code{-1}, \code{1}. } \source{ Cox & Wermuth (1996), p. 229. } \references{Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall.} \examples{ data(glucose) ## See Cox & Wermuth (1996), Figure 6.3 p. 140 coplot(Y ~ W | A, data=glucose) } \keyword{datasets} ggm/man/rnormDag.Rd0000644000176200001440000000373512312776012013650 0ustar liggesusers\name{rnormDag} \alias{rnormDag} \title{Random sample from a decomposable Gaussian model} \description{ Generates a sample from a mean centered multivariate normal distribution whose covariance matrix has a given triangular decomposition. } \usage{ rnormDag(n, A, Delta) } \arguments{ \item{n}{an integer > 0, the sample size.} \item{A}{a square, upper triangular matrix with ones along the diagonal. It defines, together with \code{Delta}, the concentration matrix (and also the covariance matrix) of the multivariate normal. The order of \code{A} is the number of components of the normal.} \item{Delta}{a numeric vector of length equal to the number of columns of \code{A}.} } \details{ The value in position \eqn{(i,j)} of \code{A} (with \eqn{i < j}) is a regression coefficient (with sign changed) in the regression of variable \eqn{i} on variables \eqn{i+1, \dots, d}. The value in position \eqn{i} of \code{Delta} is the residual variance in the above regression. } \value{a matrix with \code{n} rows and \code{nrow(A)} columns, a sample from a multivariate normal distribution with mean zero and covariance matrix \code{S = solve(A) \%*\% diag(Delta) \%*\% t(solve(A))}. } \references{Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall.} \author{Giovanni M. Marchetti} \seealso{\code{\link{triDec}}, \code{\link{fitDag}}} \examples{ ## Generate a sample of 100 observation from a multivariate normal ## The matrix of the path coefficients A <- matrix( c(1, -2, -3, 0, 0, 0, 0, 0, 1, 0, -4, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 1, 1, -5, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0, 0, 1, -4, 0, 0, 0, 0, 0, 0, 1), 7, 7, byrow=TRUE) D <- rep(1, 7) X <- rnormDag(100, A, D) ## The true covariance matrix solve(A) \%*\% diag(D) \%*\% t(solve(A)) ## Triangular decomposition of the sample covariance matrix triDec(cov(X))$A } \keyword{distribution} \keyword{multivariate} ggm/man/fundCycles.Rd0000644000176200001440000000242012312776012014164 0ustar liggesusers\name{fundCycles} \alias{fundCycles} \title{Fundamental cycles} \description{ Finds the list of fundamental cycles of a connected undirected graph. } \usage{ fundCycles(amat) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{a symmetric matrix with dimnames denoting the adjacency matrix of the undirected graph. The graph must be connected, otherwise the function returns an error message.} } \details{ All the cycles in an UG can be obtained from combination (ring sum) of the set of fundamental cycles. } \value{ a list of matrices with two columns. Every component of the list is associated to a cycle. The cycle is described by a \eqn{k \times 2} matrix whose rows are the edges of the cycle. If there is no cycle the function returns \code{NULL}. } \references{ Thulasiraman, K. \& Swamy, M.N.S. (1992). \emph{Graphs: theory and algorithms}. New York: Wiley. } \author{Giovanni M. Marchetti} \note{This function is used by \code{cycleMatrix} and \code{isGident}.} \seealso{\code{\link{UG}},\code{\link{findPath}}, \code{\link{cycleMatrix}}, \code{\link{isGident}},\code{\link{bfsearch}}} \examples{ ## Three fundamental cycles fundCycles(UG(~a*b*d + d*e + e*a*f)) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/parcor.Rd0000644000176200001440000000205412312776012013356 0ustar liggesusers\name{parcor} \alias{parcor} \title{Partial correlations} \description{ Finds the matrix of the partial correlations between pairs of variables given the rest. } \usage{ parcor(S) } \arguments{ \item{S}{a symmetric positive definite matrix, representing a covariance matrix.} } \details{ The algorithm computes \eqn{- \sigma^{rs}/(\sigma^{rr} \sigma^{ss})^{1/2}} where the \eqn{\sigma^{rs}} are concentrations, i.e. elements of the inverse covariance matrix. } \value{ A symmetric matrix with ones along the diagonal and in position \eqn{(r,s)} the partial correlation between variables \eqn{r} and \eqn{s} given all the remaining variables.} \references{Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall.} \author{Giovanni M. Marchetti} \seealso{\code{\link{var}}, \code{\link{cor}}, \code{\link{correlations}}} \examples{ ### Partial correlations for the mathematics marks data data(marks) S <- var(marks) parcor(S) } \keyword{array} \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/MarkEqRcg.Rd0000644000176200001440000000316412312776012013707 0ustar liggesusers\name{MarkEqRcg} \alias{MarkEqRcg} \title{ Markov equivalence for regression chain graphs. } \description{ \code{MarkEqMag} determines whether two RCGs (or subclasses of RCGs) are Markov equivalent. } \usage{ MarkEqRcg(amat, bmat) } \arguments{ \item{amat}{ An adjacency matrix of an RCG or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } \item{bmat}{ The same as \code{amat}. } } \details{ The function checks whether the two graphs have the same skeleton and unshielded colliders. } \value{ "Markov Equivalent" or "NOT Markov Equivalent". } \references{ Wermuth, N. and Sadeghi, K. (2011). Sequences of regressions and their independences. \emph{TEST}, To appear. \url{http://arxiv.org/abs/1103.2523}. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{MarkEqMag}}, \code{\link{msep}} } \examples{ H1<-matrix(c(0,100,0,0,0,100,0,100,0,0,0,100,0,0,0,1,0,0,0,100,0,0,1,100,0),5,5) H2<-matrix(c(0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,100,0,0,1,100,0),5,5) H3<-matrix(c(0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0),5,5) #MarkEqRcg(H1,H2) #MarkEqRcg(H1,H3) #MarkEqRcg(H2,H3) } \keyword{graphs} \keyword{bidirected graph} \keyword{directed acyclic graph} \keyword{Markov equivalence} \keyword{regression chain graph} \keyword{undirected graph} \keyword{multivariate}ggm/man/transClos.Rd0000644000176200001440000000170012312776012014035 0ustar liggesusers\name{transClos} \alias{transClos} \title{Transitive closure of a graph} \description{ Computes the transitive closure of a graph (undirected or directed acyclic).} \usage{ transClos(amat) } \arguments{ \item{amat}{a Boolean matrix with dimnames representing the adjacency matrix of a graph.} } \details{ The transitive closure of a directed graph with adjacency matrix \eqn{A} is a graph with adjacency matrix \eqn{A^*} such that \eqn{A^*_{i,j} = 1} if there is a directed path from \eqn{i} to \eqn{j}. The transitive closure of an undirected graph is defined similarly (by substituting path to directed path). } \value{ \item{A}{The adjacency matrix of the transitive closure.} } \author{Giovanni M. Marchetti} \seealso{\code{\link{DAG}}, \code{\link{UG}}} \examples{ ## Closure of a DAG d <- DAG(y ~ x, x ~ z) transClos(d) ## Closure of an UG g <- UG(~ x*y*z+z*u+u*v) transClos(g) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/fitDag.Rd0000644000176200001440000000354512312776012013274 0ustar liggesusers\name{fitDag} \alias{fitDag} \title{Fitting of Gaussian DAG models} \description{ Fits linear recursive regressions with independent residuals specified by a DAG. } \usage{ fitDag(amat, S, n) } \arguments{ \item{amat}{a square matrix with dimnames representing the adjacency matrix of the DAG} \item{S}{a symmetric positive definite matrix, the sample covariance matrix} \item{n}{an integer > 0, the sample size} } \details{ \code{fitDag} checks if the order of the nodes in adjacency matrix is the same of \code{S} and if not it reorders the adjacency matrix to match the order of the variables in \code{S}. The nodes of the adjacency matrix may form a subset of the variables in \code{S}. } \value{ \item{Shat}{the fitted covariance matrix.} \item{Ahat}{a square matrix of the fitted regression coefficients. The entry \code{Ahat[i,j]} is minus the regression coefficient of variable \code{i} in the regression equation \code{j}. Thus there is a non zero partial regression coefficient \code{Ahat[i,j]} corresponding to each non zero value \code{amat[j,i]} in the adjacency matrix.} \item{Dhat}{a vector containing the partial variances of each variable given the parents.} \item{dev}{the `deviance' of the model.} \item{df}{the degrees of freedom.} } \references{Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall.} \author{Giovanni M. Marchetti} \seealso{\code{\link{DAG}}, \code{\link{swp}}.} \examples{ dag <- DAG(y ~ x+u, x ~ z, z ~ u) "S" <- structure(c(2.93, -1.7, 0.76, -0.06, -1.7, 1.64, -0.78, 0.1, 0.76, -0.78, 1.66, -0.78, -0.06, 0.1, -0.78, 0.81), .Dim = c(4,4), .Dimnames = list(c("y", "x", "z", "u"), c("y", "x", "z", "u"))) fitDag(dag, S, 200) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/isADMG.Rd0000644000176200001440000000247412312776012013142 0ustar liggesusers\name{isADMG} \alias{isADMG} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Acyclic directed mixed graphs } \description{ Check if it is an adjacency matrix of an ADMG } \usage{ isADMG(amat) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{ %% ~~Describe \code{amat} here~~ } } \details{ Checks if the following conditions must hold: (i) no undirected edge meets an arrowhead; (ii) no directed cycles; } \value{ A logical value, \code{TRUE} if it is an ancestral graph and \code{FALSE} otherwise. } \references{Richardson, T. S. and Spirtes, P. (2002). Ancestral Graph Markov Models. \emph{Annals of Statistics}, 30(4), 962--1030. } \author{ Giovanni M. Marchetti, Mathias Drton } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{makeMG}}, \code{\link{isADMG}}} \examples{ ## Examples from Richardson and Spirtes (2002) a1 <- makeMG(dg=DAG(a~b, b~d, d~c), bg=UG(~a*c)) isADMG(a1) # Not an AG. (a2) p.969 a2 <- makeMG(dg=DAG(b ~ a, d~c), bg=UG(~a*c+c*b+b*d)) # Fig. 3 (b1) p.969 isADMG(a2) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{graphs} \keyword{ancestral graph} \keyword{mixed graph} \keyword{models} \keyword{multivariate} ggm/man/icf.Rd0000644000176200001440000000372312312776012012635 0ustar liggesusers\name{icf} \alias{icf} \alias{icfmag} \title{Iterative conditional fitting} \description{ Main algorithm for MLE fitting of Gaussian Covariance Graphs and Gaussian Ancestral models. } \usage{ icf(bi.graph, S, start = NULL, tol = 1e-06) icfmag(mag, S, tol = 1e-06) } \arguments{ \item{bi.graph}{a symmetric matrix with dimnames representing the adjacency matrix of an undirected graph.} \item{mag}{a square matrix representing the adjacency matrix of an ancestral graph (for example returned by \code{makeAG}).} \item{S}{a symmetric positive definite matrix, the sample covariance matrix. The order of the variables must be the same of the order of vertices in the adjacency matrix.} \item{start}{a symmetric matrix used as starting value of the algorithm. If \code{start=NULL} the starting value is a diagonal matrix.} \item{tol}{A small positive number indicating the tolerance used in convergence tests.} } \details{ These functions are not intended to be called directly by the user. } \value{ \item{Sigmahat}{the fitted covariance matrix.} \item{Bhat}{matrix of the fitted regression coefficients associated to the directed edges.} \item{Omegahat}{matrix of the partial covariances of the residuals between regression equations.} \item{iterations}{the number of iterations.} } \references{ Drton, M. \& Richardson, T. S. (2003). A new algorithm for maximum likelihood estimation in Gaussian graphical models for marginal independence. \emph{Proceedings of the Ninetheen Conference on Uncertainty in Artificial Intelligence}, 184--191. Drton, M. \& Richardson, T. S. (2004). Iterative Conditional Fitting for Gaussian Ancestral Graph Models. Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence, Department of Statistics, 130--137. } \author{Mathias Drton} \seealso{\code{\link{fitCovGraph}}, \code{\link{fitAncestralGraph}}} \keyword{internal} ggm/man/ggm.Rd0000644000176200001440000000744413621545276012664 0ustar liggesusers\name{ggm} \alias{ggm} \title{ The package \code{ggm}: summary information } \description{ This package provides functions for defining, manipulating and fitting graphical Markov models with mixed graphs. It is intended as a contribution to the gR-project described by Lauritzen (2002). For a tutorial illustrating the new functions in the package `ggm` that deal with ancestral, summary and ribbonless graphs see Sadeghi and Marchetti (2012) in the references. } \section{Functions}{ The main functions can be classified as follows. \itemize{ \item Functions for defining graphs (undirected, directed acyclic, ancestral and summary graphs): \code{\link{UG}}, \code{\link{DAG}}, \code{\link{makeMG}}, \code{\link{grMAT}}; \item Functions for doing graph operations (parents, boundary, cliques, connected components, fundamental cycles, d-separation, m-separation): \code{\link{pa}}, \code{\link{bd}}, \code{\link{cliques}}, \code{\link{conComp}}, \code{\link{fundCycles}}; \item Functions for testing independence statements and generating maximal graphs from non-maximal graphs: \code{\link{dSep}}, \code{\link{msep}}, \code{\link{Max}}; \item Function for finding covariance and concentration graphs induced by marginalization and conditioning: \code{\link{inducedCovGraph}}, \code{\link{inducedConGraph}}; \item Functions for finding multivariate regression graphs and chain graphs induced by marginalization and conditioning: \code{\link{inducedRegGraph}}, \code{\link{inducedChainGraph}}, \code{\link{inducedDAG}}; \item Functions for finding stable mixed graphs (ancestral, summary and ribbonless) after marginalization and conditioning: \code{\link{AG}}, \code{\link{SG}}, \code{\link{RG}}; \item Functions for fitting by ML Gaussian DAGs, concentration graphs, covariance graphs and ancestral graphs: \code{\link{fitDag}}, \code{\link{fitConGraph}}, \code{\link{fitCovGraph}}, \code{\link{fitAncestralGraph}}; \item Functions for testing several conditional independences:\code{\link{shipley.test}}; \item Functions for checking global identification of DAG Gaussian models with one latent variable (Stanghellini-Vicard's condition for concentration graphs, new sufficient conditions for DAGs): \code{\link{isGident}}, \code{\link{checkIdent}}; \item Functions for fitting Gaussian DAG models with one latent variable: \code{\link{fitDagLatent}}; \item Functions for testing Markov equivalences and generating Markov equivalent graphs of specific types: \code{\link{MarkEqRcg}}, \code{\link{MarkEqMag}}, \code{\link{RepMarDAG}}, \code{\link{RepMarUG}}, \code{\link{RepMarBG}}. } } \section{Authors}{ Giovanni M. Marchetti, Dipartimento di Statistica, Informatica, Applicazioni 'G. Parenti'. University of Florence, Italy Mathias Drton, Department of Statistics, University of Washington, USA Kayvan Sadeghi, Department of Statistics, Carnegie Mellon University, USA } \section{Acknowledgements}{ Many thanks to Fulvia Pennoni for testing some of the functions, to Elena Stanghellini for discussion and examples and to Claus Dethlefsen and Jens Henrik Badsberg for suggestions and corrections. The function \code{fitConGraph} was corrected by Ilaria Carobbi. Helpful discussions with Steffen Lauritzen and Nanny Wermuth, are gratefully acknowledged. Thanks also to Michael Perlman, Thomas Richardson and David Edwards. Giovanni Marchetti has been supported by MIUR, Italy, under grant scheme PRIN 2002, and Mathias Drton has been supported by NSF grant DMS-9972008 and University of Washington RRF grant 65-3010. } \references{ Lauritzen, S. L. (2002). gRaphical Models in R. \emph{R News}, 3(2)39. Sadeghi, K. and Marchetti, G.M. (2012). Graphical Markov models with mixed graphs in R. \emph{The R Journal}, 4(2):65-73. \url{https://journal.r-project.org/archive/2012/RJ-2012-015/RJ-2012-015.pdf} } \keyword{multivariate} \keyword{models} \keyword{graphs} ggm/man/edgematrix.Rd0000644000176200001440000000276112312776012014226 0ustar liggesusers\name{edgematrix} \alias{edgematrix} \title{Edge matrix of a graph} \description{ Transforms the adjacency matrix of a graph into an ``edge matrix''. } \usage{ edgematrix(E, inv=FALSE) } \arguments{ \item{E}{a square matrix, representing the adjacency matrix of a graph.} \item{inv}{a logical value.} } \details{ In some matrix computations for graph objects the adjacency matrix of the graph is transformed into an ``edge matrix''. Briefly, if \eqn{E} is the adjacency matrix of the graph, the edge matrix is \eqn{A = sign(E+I)^T=[a_{ij}]}. Thus, \eqn{A} has ones along the diagonal and if the graph has no edge between nodes \eqn{i} and \eqn{j} the entries \eqn{a_{i,j}} and \eqn{a_{j,i}} are both zero. If there is an arrow from \eqn{j} to \eqn{i} \eqn{a_{i,j}=1} and \eqn{a_{j,i} = 0}. If there is an undirected edge, both \eqn{a_{i,j}=a_{j,i}=1}. } \value{ \item{A}{the edge matrix of the graph. If \code{TRUE} the nodes are sorted in inverted topological order and the edge matrix is upper triangular.} } \references{Wermuth, N. (2003). Analysing social science data with graphical Markov models. In: \emph{Highly Structured Stochastic Systems.} P. Green, N. Hjort \& T. Richardson (eds.), 47--52. Oxford: Oxford University Press.} \author{Giovanni M. Marchetti} \seealso{\code{\link{adjMatrix}}} \examples{ amat <- DAG(y ~ x+z, z~u+v) amat edgematrix(amat) edgematrix(amat, inv=TRUE) } \keyword{array} \keyword{algebra} \keyword{graphs} \keyword{multivariate} ggm/man/msep.Rd0000644000176200001440000000323312312776012013034 0ustar liggesusers\name{msep} \alias{msep} \title{The m-separation criterion } \description{ \code{msep} determines whether two set of nodes are m-separated by a third set of nodes. } \usage{ msep(a, alpha, beta, C = c()) } \arguments{ \item{a}{ An adjacency matrix, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } \item{alpha}{ A subset of the node set of \code{a}} \item{beta}{ Another disjoint subset of the node set of \code{a}} \item{C}{ A third disjoint subset of the node set of \code{a}} } %\details{ %% ~~ If necessary, more details than the description above ~~ %} \value{ A logical value. \code{TRUE} if \code{alpha} and \code{beta} are m-separated given \code{C}. \code{FALSE} otherwise. } \references{ Richardson, T.S. and Spirtes, P. (2002) Ancestral graph Markov models. \emph{Annals of Statistics}, 30(4), 962-1030. Sadeghi, K. and Lauritzen, S.L. (2011). Markov properties for loopless mixed graphs. \emph{Submitted}, 2011. URL \url{http://arxiv.org/abs/1109.5909}. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{dSep}}, \code{\link{MarkEqMag}} } \examples{ H <-matrix(c(0,0,0,0, 1,0,0,1, 0,1,0,0, 0,0,0,0),4,4) msep(H,1,4, 2) msep(H,1,4, c()) } \keyword{graphs} \keyword{d-separation} \keyword{m-separation} \keyword{mixed graph}% __ONLY ONE__ keyword per line ggm/man/fitDagLatent.Rd0000644000176200001440000000733712312776012014447 0ustar liggesusers\name{fitDagLatent} \alias{fitDagLatent} \title{Fitting Gaussian DAG models with one latent variable} \description{ Fits by maximum likelihood a Gaussian DAG model where one of the nodes of the graph is latent and it is marginalised over. } \usage{ fitDagLatent(amat, Syy, n, latent, norm = 1, seed, maxit = 9000, tol = 1e-06, pri = FALSE) } \arguments{ \item{amat}{a square matrix with dimnames representing the adjacency matrix of the DAG.} \item{Syy}{a symmetric positive definite matrix, with dimnames, the sample covariance matrix of the observed variables. The set of the observed nodes of the graph must be a subset of the set of the names of the variables in \code{Syy}.} \item{n}{a positive integer, the sample size.} \item{latent}{the name of the latent variable.} \item{norm}{an integer, the kind of normalization of the latent variable. If \code{norm=1}, the latent is scaled to have unit variance. If \code{norm=2}, the latent is scaled to have unit partial variance given its parents.} \item{seed}{an integer, used by \code{set.seed} to specify a random starting point of the EM algorithm.} \item{maxit}{an integer denoting the maximum number of iterations allowed for the EM algorithm. If the convergence criterion is not satisfied within maxit iterations the algorithms stops and a warning message is returned.} \item{tol}{a small real value, denoting the tolerance used in testing convergence.} \item{pri}{logical, if \code{pri=TRUE} then the value of the deviance at each iteration is printed.} } \details{ For the EM algorithm used see Kiiveri (1987). } \value{ \item{Shat}{ the fitted covariance matrix of all the variables including the latent one. The latent variable is the last. If \code{norm=1} then the variance of the latent variable is constrained to 1. } \item{Ahat}{ a square matrix of the fitted regression coefficients. The entry \code{Ahat[i,j]} is minus the regression coefficient of variable \code{i} in the regression equation \code{j}. Thus there is a non zero partial regression coefficient \code{Ahat[i,j]} corresponding to each non zero value \code{amat[j,i]} in the adjacency matrix. } \item{Dhat}{ a vector containing the partial variances of each variable given the parents. If \code{norm=2} then the partial variance of the latent variable is constrained to 1. } \item{dev}{ the `deviance' of the model. } \item{df}{ the degrees of freedom of the model. } \item{it}{ the number of EM algorithm iterations at convergence. } } \references{ Kiiveri,H. T. (1987). An incomplete data approach to the analysis of covariance structures. \emph{Psychometrika}, 52, 4, 539--554. Joreskog, K.G. and Goldberger, A.S. (1975). Estimation of a model with multiple indicators and multiple causes of a single latent variable. \emph{Journal of the American Statistical Association}, 10, 631--639. } \author{Giovanni M. Marchetti} \seealso{\code{\link{fitDag}}, \code{\link{checkIdent}}} \examples{ ## data from Joreskog and Goldberger (1975) V <- matrix(c(1, 0.36, 0.21, 0.10, 0.156, 0.158, 0.36, 1, 0.265, 0.284, 0.192, 0.324, 0.210, 0.265, 1, 0.176, 0.136, 0.226, 0.1, 0.284, 0.176, 1, 0.304, 0.305, 0.156, 0.192, 0.136, 0.304, 1, 0.344, 0.158, 0.324, 0.226, 0.305, 0.344, 1), 6,6) nod <- c("y1", "y2", "y3", "x1", "x2", "x3") dimnames(V) <- list(nod,nod) dag <- DAG(y1 ~ z, y2 ~ z, y3 ~ z, z ~ x1 + x2 + x3, x1~x2+x3, x2~x3) fitDagLatent(dag, V, n=530, latent="z", seed=4564) fitDagLatent(dag, V, n=530, latent="z", norm=2, seed=145) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/isGident.Rd0000644000176200001440000000303212312776012013633 0ustar liggesusers\name{isGident} \alias{isGident} \title{G-identifiability of an UG} \description{ Tests if an undirected graph is G-identifiable. } \usage{ isGident(amat) } \arguments{ \item{amat}{a symmetric matrix with dimnames representing the adjacency matrix of an undirected graph} } \details{ An undirected graph is said G-identifiable if every connected component of the complementary graph contains an odd cycle (Stanghellini and Wermuth, 2005). See also Tarantola and Vicard (2002). } \value{ a logical value, \code{TRUE} if the graph is G-identifiable and \code{FALSE} if it is not. } \references{ Stanghellini, E. \& Wermuth, N. (2005). On the identification of path-analysis models with one hidden variable. \emph{Biometrika}, 92(2), 337-350. Stanghellini, E. (1997). Identification of a single-factor model using graphical Gaussian rules. \emph{Biometrika}, 84, 241--244. Tarantola, C. \& Vicard, P. (2002). Spanning trees and identifiability of a single-factor model. \emph{Statistical Methods \& Applications}, 11, 139--152. Vicard, P. (2000). On the identification of a single-factor model with correlated residuals. \emph{Biometrika}, 87, 199--205. } \author{Giovanni M. Marchetti} \seealso{\code{\link{UG}}, \code{\link{cmpGraph}}, \code{\link{cycleMatrix}}} \examples{ ## A not G-identifiable UG G1 <- UG(~ a*b + u*v) isGident(G1) ## G-identifiable UG G2 <- UG(~ a + b + u*v) isGident(G2) ## G-identifiable UG G3 <- cmpGraph(UG(~a*b*c+x*y*z)) isGident(G3) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/null.Rd0000644000176200001440000000131712312776012013043 0ustar liggesusers\name{null} \alias{null} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Null space of a matrix } \description{ Given a matrix \code{M} find a matrix \code{N} such that \eqn{N^T M} is zero. } \usage{ null(M) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{M}{ A matrix. } } \value{ The matrix \code{N} with the basis for the null space, or an empty vector if the matrix \code{M} is square and of maximal rank. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{Null}}, ~~~ } \examples{ null(c(1,1,1)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{matrix} ggm/man/marks.Rd0000644000176200001440000000167312312776012013213 0ustar liggesusers\name{marks} \alias{marks} \docType{data} \title{Mathematics marks} \description{ Examination marks of 88 students in five subjects. } \usage{data(marks)} \format{ A data frame with 88 observations on the following 5 variables. \describe{ \item{mechanics}{a numeric vector, mark in Mechanics} \item{vectors}{a numeric vector, mark in Vectors} \item{algebra}{a numeric vector, mark in Algebra} \item{analysis}{a numeric vector, mark in Analysis} \item{statistics}{a numeric vector, mark in Statistics } } } \details{ Mechanics and Vectors were closed book examinations. Algebra, Analysis and Statistics were open book examinations. } \source{ Mardia, K.V., Kent, J.T. and Bibby, (1979). \emph{Multivariate analysis}. London: Academic Press. } \references{ Whittaker, J. (1990). \emph{Graphical models in applied multivariate statistics}. Chichester: Wiley. } \examples{ data(marks) pairs(marks) } \keyword{datasets} ggm/man/cmpGraph.Rd0000644000176200001440000000154512312776012013635 0ustar liggesusers\name{cmpGraph} \alias{cmpGraph} \title{The complementary graph} \description{ Finds the complementary graph of an undirected graph. } \usage{ cmpGraph(amat) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{the adjacency matrix of an undirected graph} } \details{ The complementary graph of an UG is the graph that has the same set of nodes and an undirected edge connecting \eqn{i} and \eqn{j} whenever there is not an \eqn{(i,j)} edge in the original UG. } \value{ the edge matrix of the complementary graph. } \references{Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press.} \author{Giovanni M. Marchetti} \seealso{\code{\link{UG}}, \code{\link{DAG}}} \examples{ ## A chordless four-cycle four <- UG(~ a*b + b*d + d*e + e*a) four cmpGraph(four) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/UtilityFunctions.Rd0000644000176200001440000000106112312776012015421 0ustar liggesusers\name{Utility Functions} \alias{rem} \alias{SPl} \alias{RR} \alias{likGau} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Utility functions } \description{ Functions used internally. } %- maybe also 'usage' for other objects documented here. \author{ Kayvan Sadeghi, Giovanni M. Marchetti } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{unique},\code{setdiff}, \code{is.element} } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{utility} ggm/man/powerset.Rd0000644000176200001440000000177612312776012013752 0ustar liggesusers\name{powerset} \alias{powerset} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Power set } \description{ Finds the list of all subsets of a set. } \usage{ powerset(set, sort = TRUE, nonempty = TRUE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{set}{ A numeric or character vector. } \item{sort}{ Logical value. If \code{TRUE} the subsets are sorted according to dimension. Default is \code{TRUE}. } \item{nonempty}{ Logical value. If \code{TRUE} the empty set is omitted. Default is \code{TRUE}. } } \details{ If \code{sort == FALSE} the sets are in inverse lexicographical order. } \value{ A list of all subsets of \code{set}. } \author{ Giovanni M. Marchetti } %% ~Make other sections like Warning with \section{Warning }{....} ~ \examples{ powerset(c("A", "B", "C"), nonempty = FALSE) powerset(1:3, sort = FALSE, nonempty = TRUE) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{sets} ggm/man/allEdges.Rd0000644000176200001440000000143512312776012013612 0ustar liggesusers\name{allEdges} \alias{allEdges} \title{All edges of a graph} \description{ Finds the set of edges of a graph. That is the set of undirected edges if the graph is undirected and the set of arrows if the graph is directed. } \usage{ allEdges(amat) } \arguments{ \item{amat}{a square Boolean matrix, with dimnames, the adjacency matrix of a graph.} } \value{ a matrix with two columns. Each row of the matrix is a pair of indices indicating an edge of the graph. If the graph is undirected, then only one of the pairs \eqn{(i,j), (j,i)} is reported. } \author{Giovanni M. Marchetti} \seealso{\code{\link{cycleMatrix}}} \examples{ ## A UG graph allEdges(UG(~ y*v*k +v*k*d+y*d)) ## A DAG allEdges(DAG(u~h+o+p, h~o, o~p)) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/RepMarUG.Rd0000644000176200001440000000440612312776012013515 0ustar liggesusers\name{RepMarUG} \alias{RepMarUG} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Representational Markov equivalence to undirected graphs. } \description{ \code{RepMarUG} determines whether a given maximal ancestral graph can be Markov equivalent to an undirected graph, and if that is the case, it finds an undirected graph that is Markov equivalent to the given graph.} \usage{ RepMarUG(amat) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{ An adjacency matrix, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } } \details{ \code{RepMarBG} looks for presence of an unshielded collider V-configuration in graph.} \value{ A list with two components: \code{verify} and \code{amat}. \code{verify} is a logical value, \code{TRUE} if there is a representational Markov equivalence and \code{FALSE} otherwise. \code{amat} is either \code{NA} if \code{verify == FALSE} or the adjacency matrix of the generated graph, if \code{verify == TRUE}. In this case it consists of 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{ Sadeghi, K. (2011). Markov equivalences for subclasses of loopless mixed graphs. \emph{Submitted}, 2011. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{MarkEqMag}}, \code{\link{MarkEqRcg}}, \code{\link{RepMarBG}}, \code{\link{RepMarDAG}} } \examples{ H<-matrix(c(0,10,0,0,10,0,0,0,0,1,0,100,0,0,100,0),4,4) RepMarUG(H) } \keyword{graphs} \keyword{bidirected graph} \keyword{Markov equivalence} \keyword{maximal ancestral graph} \keyword{representational Markov equivalence} ggm/man/makeMG.Rd0000644000176200001440000000515212312776012013233 0ustar liggesusers\name{makeMG} \alias{makeMG} \title{Mixed Graphs} \description{ Defines a loopless mixed graph from the directed, undirected and undirected components. } \usage{ makeMG(dg = NULL, ug = NULL, bg = NULL) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{dg}{the adjacency matrix of a directed graph specifying the arrows of the mixed graph.} \item{ug}{the adjacency matrix of an undirected graph specifying the lines of the mixed graph.} \item{bg}{the adjacency matrix of an undirected graph specifying the bidirected edges of the mixed graph.} } \details{ A loopless mixed graph is a mixed graph with three types of edges: undirected, directed and bi-directed edges. Note that the three adjacency matrices must have labels and may be defined using the functions \code{DG}, \code{DAG} or \code{UG}. The adjacency matrices of the undirected graphs may be just symmetric Boolean matrices. } \value{ a square matrix obtained by combining the three graph components into an adjacency matrix of a mixed graph. The matrix consists of 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{Richardson, T. S. and Spirtes, P. (2002). Ancestral Graph Markov Models. \emph{Annals of Statistics}, 30(4), 962--1030. } \author{Giovanni M. Marchetti, Mathias Drton} \seealso{\code{\link{UG}}, \code{\link{DAG}}} \examples{ ## Examples from Richardson and Spirtes (2002) a1 <- makeMG(dg=DAG(a~b, b~d, d~c), bg=UG(~a*c)) isAG(a1) # Not an AG. (a2) p.969 a2 <- makeMG(dg=DAG(b ~ a, d~c), bg=UG(~a*c+c*b+b*d)) # Fig. 3 (b1) p.969 isAG(a1) a3 <- makeMG(ug = UG(~ a*c), dg=DAG(b ~ a, d~c), bg=UG(~ b*d)) # Fig. 3 (b2) p.969 a5 <- makeMG(bg=UG(~alpha*beta+gamma*delta), dg=DAG(alpha~gamma, delta~beta)) # Fig. 6 p. 973 ## Another Example a4 <- makeMG(ug=UG(~y0*y1), dg=DAG(y4~y2, y2~y1), bg=UG(~y2*y3+y3*y4)) ## A mixed graphs with double edges. mg <- makeMG(dg = DG(Y ~ X, Z~W, W~Z, Q~X), ug = UG(~X*Q), bg = UG(~ Y*X+X*Q+Q*W + Y*Z) ) ## Chronic pain data: a regression graph chronic.pain <- makeMG(dg = DAG(Y ~ Za, Za ~ Zb + A, Xa ~ Xb, Xb ~ U+V, U ~ A + V, Zb ~ B, A ~ B), bg = UG(~Za*Xa + Zb*Xb)) } \keyword{graphs} \keyword{ancestral graph} \keyword{mixed graph} \keyword{models} \keyword{multivariate} ggm/man/cycleMatrix.Rd0000644000176200001440000000307712312776012014362 0ustar liggesusers\name{cycleMatrix} \alias{cycleMatrix} \title{Fundamental cycles} \description{ Finds the matrix of fundamental cycles of a connected undirected graph. } \usage{ cycleMatrix(amat) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{a symmetric matrix with dimnames denoting the adjacency matrix of the undirected graph. The graph must be connected, otherwise the function returns an error message.} } \details{ All the cycles in an UG can be obtained from combination (ring sum) of the set of fundamental cycles. The matrix of fundamental cycles is a Boolean matrix having as rows the fundamental cycles and as columns the edges of the graph. If an entry is one then the edge associated to that column belongs to the cycle associated to the row. } \value{ a Boolean matrix of the fundamental cycles of the undirected graph. If there is no cycle the function returns \code{NULL}.} \references{ Thulasiraman, K. \& Swamy, M.N.S. (1992). \emph{Graphs: theory and algorithms}. New York: Wiley. } \author{Giovanni M. Marchetti} \note{This function is used by \code{isGident}. The row sum of the matrix gives the length of the cycles.} \seealso{\code{\link{UG}}, \code{\link{findPath}}, \code{\link{fundCycles}}, \code{\link{isGident}}, \code{\link{bfsearch}}} \examples{ ## Three cycles cycleMatrix(UG(~a*b*d+d*e+e*a*f)) ## No cycle cycleMatrix(UG(~a*b)) ## two cycles: the first is even and the second is odd cm <- cycleMatrix(UG(~a*b+b*c+c*d+d*a+a*u*v)) apply(cm, 1, sum) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/triDec.Rd0000644000176200001440000000342512312776012013305 0ustar liggesusers\name{triDec} \alias{triDec} \title{Triangular decomposition of a covariance matrix} \description{ Decomposes a symmetric positive definite matrix with a variant of the Cholesky decomposition. } \usage{ triDec(Sigma) } \arguments{ \item{Sigma}{a symmetric positive definite matrix.} } \details{ Any symmetric positive definite matrix \eqn{\Sigma}{Sigma} can be decomposed as \eqn{\Sigma = B \Delta B^T}{Sigma = B \%*\% Delta \%*\% t(B)} where \eqn{B} is upper triangular with ones along the main diagonal and \eqn{\Delta}{Delta} is diagonal. If \eqn{\Sigma}{Sigma} is a covariance matrix, the concentration matrix is \eqn{\Sigma^{-1} = A^T \Delta^{-1} A} where \eqn{A = B^{-1}} is the matrix of the regression coefficients (with the sign changed) of a system of linear recursive regression equations with independent residuals. In the equations each variable \eqn{i} is regressed on the variables \eqn{i+1, \dots, d}. The elements on the diagonal of \eqn{\Delta} are the partial variances. } \value{ \item{A}{a square upper triangular matrix of the same order as \code{Sigma} with ones on the diagonal.} \item{B}{the inverse of \code{A}, another triangular matrix with unit diagonal.} \item{Delta}{a vector containing the diagonal values of \eqn{\Delta}.} } \references{Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall.} \author{Giovanni M. Marchetti} \seealso{\code{\link{chol}}} \examples{ ## Triangular decomposition of a covariance matrix B <- matrix(c(1, -2, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1), 4, 4, byrow=TRUE) B D <- diag(c(3, 1, 2, 1)) S <- B \%*\% D \%*\% t(B) triDec(S) solve(B) } \keyword{array} \keyword{algebra} \keyword{models} \keyword{multivariate} ggm/man/plotGraph.Rd0000644000176200001440000001110312352023151014014 0ustar liggesusers\name{plotGraph} \alias{plotGraph} %- Also NEED an '\alias' for EACH other topic documented here. \title{Plot of a mixed graph} \description{ Plots a mixed graph from an adjacency matrix, a \code{graphNEL} object, an \code{\link{igraph}} object, or a descriptive vector. } \usage{ plotGraph(a, dashed = FALSE, tcltk = TRUE, layout = layout.auto, directed = FALSE, noframe = FALSE, nodesize = 15, vld = 0, vc = "gray", vfc = "black", colbid = "FireBrick3", coloth = "black", cex = 1.5, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{a}{ An adjacency matrix: a matrix that consists of 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers can be added to generate multiple edges of different types. The matrix must be symmetric w.r.t full lines and bi-directed arrows. Or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object.Or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type,node1label,node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } \item{dashed}{A logical value. If \code{TRUE} the bi-directed edges are plotted as undirected dashed edges.} \item{tcltk}{A logical value. If \code{TRUE} the function opens a tcltk device to plot the graphs, allowing the interactive manimulation of the graph. If \code{FALSE}the function opens a standard device without interaction.} \item{layout}{The name of a function used to compute the (initial) layout of the graph. The default is \code{layout.auto}. This can be further adjusted if \code{tcltk} is \code{TRUE}.} \item{directed}{A logical value. If \code{FALSE} a symmetric adjacency matrix with entries 1 is interpreted as an undirected graph. If \code{FALSE} it is interpreted as a directed graph with double arrows. If \code{a} is not an adjacency matrix, it is ignored.} \item{noframe}{A logical value. If \code{TRUE}, then the nodes are not circled.} \item{nodesize}{An integer denoting the size of the nodes (default 15). It can be increased to accommodate larger labels.} \item{vld}{An integer defining the distance between a vertex and its label. Defaults to 0.} \item{vc}{Vertex color. Default is "gray".} \item{vfc}{Vertex frame color. Default is "black".} \item{colbid}{Color of the bi-directed edges. Default is "FireBrick3".} \item{coloth}{Color of all the other edges. Default is "black".} \item{cex}{An integer (defaults to 1) to adjust the scaling of the font of the labels.} \item{\dots}{Further arguments to be passed to \code{plot} or \code{tkplot}.} } \details{ \code{plotGraph} uses \code{\link{plot}} and \code{\link{tkplot}} in \pkg{\link{igraph}} package. } \value{ Plot of the associated graph and returns invisibly a list with two slots: \code{tkp.id}, \code{graph}, the input graph as an \code{igraph} object. The id can be used to get the layout of the adjusted graph. The bi-directed edges are plotted in red. } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Kayvan Sadeghi, Giovanni M. Marchetti } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{grMAT}}, \code{\link{tkplot}}, \code{\link{drawGraph}}, \code{\link{plot.igraph}} } \examples{ exvec<-c("b",1,2,"b",1,14,"a",9,8,"l",9,11, "a",10,8,"a",11,2,"a",11,9,"a",11,10, "a",12,1,"b",12,14,"a",13,10,"a",13,12) plotGraph(exvec) ############################################ amat<-matrix(c(0,11,0,0,10,0,100,0,0,100,0,1,0,0,1,0),4,4) plotGraph(amat) plotGraph(makeMG(bg = UG(~a*b*c+ c*d), dg = DAG(a ~ x + z, b ~ z ))) plotGraph(makeMG(bg = UG(~a*b*c+ c*d), dg = DAG(a ~ x + z, b ~ z )), dashed = TRUE) # A graph with double and triple edges G <- structure(c(0, 101, 0, 0, 100, 0, 100, 100, 0, 100, 0, 100, 0, 111, 100, 0), .Dim = c(4L, 4L), .Dimnames = list(c("X", "Z", "Y", "W"), c("X", "Z", "Y", "W"))) plotGraph(G) # A regression chain graph with longer labels plotGraph(makeMG(bg = UG(~Love*Constraints+ Constraints*Reversal+ Abuse*Distress), dg = DAG(Love ~ Abuse + Distress, Constraints ~ Distress, Reversal ~ Distress, Abuse ~ Fstatus, Distress ~ Fstatus), ug = UG(~Fstatus*Schooling+ Schooling*Age)), dashed = TRUE, noframe = TRUE) # A graph with 4 edges between two nodes. G4 = matrix(0, 2, 2); G4[1,2] = 111; G4[2,1] = 111 plotGraph(G4) } \keyword{graphs} \keyword{adjacency matrix} \keyword{mixed graphs} \keyword{plot} ggm/man/swp.Rd0000644000176200001440000000265412312776012012707 0ustar liggesusers\name{swp} \alias{swp} \title{Sweep operator} \description{ Sweeps a covariance matrix with respect to a subset of indices. } \usage{ swp(V, b) } \arguments{ \item{V}{a symmetric positive definite matrix, the covariance matrix.} \item{b}{a subset of indices of the columns of \code{V}.} } \details{ The sweep operator has been introduced by Beaton (1964) as a tool for inverting symmetric matrices (see Dempster, 1969). } \value{ a square matrix \code{U} of the same order as \code{V}. If \code{a} is the complement of \code{b}, then \code{U[a,b]} is the matrix of regression coefficients of \code{a} given \code{b} and \code{U[a,a]} is the corresponding covariance matrix of the residuals. If \code{b} is empty the function returns \code{V}. If \code{b} is the vector \code{1:nrow(V)} (or its permutation) then the function returns the opposite of the inverse of \code{V}. } \references{ Beaton, A.E. (1964). \emph{The use of special matrix operators in statistical calculus}. Ed.D. thesis, Harvard University. Reprinted as Educational Testing Service Research Bulletin 64-51. Princeton. Dempster, A.P. (1969). \emph{Elements of continuous multivariate analysis}. Reading: Addison-Wesley. } \author{Giovanni M. Marchetti} \seealso{\code{\link{fitDag}}} \examples{ ## A very simple example V <- matrix(c(10, 1, 1, 2), 2, 2) swp(V, 2) } \keyword{array} \keyword{algebra} \keyword{models} \keyword{multivariate} ggm/man/diagv.Rd0000644000176200001440000000147012312776012013163 0ustar liggesusers\name{diagv} \alias{diagv} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Matrix product with a diagonal matrix } \description{ Computes faster the product of a diagonal matrix times a full matrix. } \usage{ diagv(v, M) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{v}{ A numeric vector specifying the elements on the diagonal of a matrix. } \item{M}{ A numeric matrix compatible with the product \eqn{D_v M}. } } \details{ Computes \eqn{N = D_v M} where \eqn{D_v} is diagonal avoiding the \code{diag} operator. } \value{ A matrix \code{N}. } \seealso{ \code{\link{diag}} } \examples{ v <- 1:1000 M <- matrix(runif(3000), 1000, 3) dim(diagv(v, M)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{matrix} ggm/man/shipley.test.Rd0000644000176200001440000000332212312776012014522 0ustar liggesusers\name{shipley.test} \alias{shipley.test} \title{Test of all independencies implied by a given DAG} \description{ Computes a simultaneous test of all independence relationships implied by a given Gaussian model defined according to a directed acyclic graph, based on the sample covariance matrix. } \usage{ shipley.test(amat, S, n) } \arguments{ \item{amat}{a square Boolean matrix, of the same dimension as \code{S}, representing the adjacency matrix of a DAG.} \item{S}{a symmetric positive definite matrix, the sample covariance matrix.} \item{n}{a positive integer, the sample size.} } \details{ The test statistic is \eqn{C = -2 \sum \ln p_j} where \eqn{p_j} are the p-values of tests of conditional independence in the basis set computed by \code{basiSet(A)}. The p-values are independent uniform variables on \eqn{(0,1)} and the statistic has exactly a chi square distribution on \eqn{2k} degrees of freedom where \eqn{k} is the number of elements of the basis set. Shipley (2002) calls this test Fisher's C test. } \value{ \item{ctest}{Test statistic \eqn{C}.} \item{df}{Degrees of freedom.} \item{pvalue}{The P-value of the test, assuming a two-sided alternative.} } \references{Shipley, B. (2000). A new inferential test for path models based on directed acyclic graphs. \emph{Structural Equation Modeling}, 7(2), 206--218.} \author{Giovanni M. Marchetti} \seealso{\code{\link{basiSet}}, \code{\link{pcor.test}}} \examples{ ## A decomposable model for the mathematics marks data data(marks) dag <- DAG(mechanics ~ vectors+algebra, vectors ~ algebra, statistics ~ algebra+analysis, analysis ~ algebra) shipley.test(dag, cov(marks), n=88) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/topSort.Rd0000644000176200001440000000300012312776012013532 0ustar liggesusers\name{topSort} \alias{topSort} \alias{topOrder} \title{Topological sort} \description{ \code{topOrder} returns the topological order of a directed acyclic graph (parents, before children). \code{topSort} permutates the adjacency matrix according to the topological order. } \usage{ topSort(amat) topOrder(amat) } \arguments{ \item{amat}{a square Boolean matrix with dimnames, representing the adjacency matrix of a directed acyclic graph.} } \details{ The topological order needs not to be unique. After the permutation the adjacency matrix of the graph is upper triangular. The function is a translation of the Matlab function \code{topological_sort} in Toolbox \pkg{BNT} written by Kevin P. Murphy. } \value{ \code{topOrder(amat)} returns a vector of integers representing the permutation of the nodes. \code{topSort(amat)} returns the adjacency matrix with rows and columns permutated. } \note{ The order of the nodes defined by \code{DAG} is that of their first appearance in the model formulae (from left to right). } \references{ Aho, A.V., Hopcrtoft, J.E. \& Ullman, J.D. (1983). \emph{Data structures and algorithms.} Reading: Addison-Wesley. Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press. } \author{Kevin P. Murphy, Giovanni M. Marchetti} \seealso{\code{\link{DAG}}, \code{\link{isAcyclic}}} \examples{ ## A simple example dag <- DAG(a ~ b, c ~ a + b, d ~ c + b) dag topOrder(dag) topSort(dag) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/RepMarBG.Rd0000644000176200001440000000422312312776012013467 0ustar liggesusers\name{RepMarBG} \alias{RepMarBG} \title{ Representational Markov equivalence to bidirected graphs. } \description{ \code{RepMarBG} determines whether a given maximal ancestral graph can be Markov equivalent to a bidirected graph, and if that is the case, it finds a bidirected graph that is Markov equivalent to the given graph. } \usage{ RepMarBG(amat) } \arguments{ \item{amat}{ An adjacency matrix, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } } \details{ \code{RepMarBG} looks for presence of an unshielded non-collider V-configuration in graph. } \value{ A list with two components: \code{verify} and \code{amat}. \code{verify} is a logical value, \code{TRUE} if there is a representational Markov equivalence and \code{FALSE} otherwise. \code{amat} is either \code{NA} if \code{verify == FALSE} or the adjacency matrix of the generated graph, if \code{verify == TRUE}. In this case it consists of 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{ Sadeghi, K. (2011). Markov equivalences for subclasses of loopless mixed graphs. \emph{Submitted}, 2011. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{MarkEqMag}}, \code{\link{MarkEqRcg}}, \code{\link{RepMarDAG}}, \code{\link{RepMarUG}} } \examples{ H<-matrix(c(0,10,0,0,10,0,0,0,0,1,0,100,0,0,100,0),4,4) RepMarBG(H) } \keyword{graphs} \keyword{bidirected graph} \keyword{Markov equivalence} \keyword{maximal ancestral graph} \keyword{representational Markov equivalence} ggm/man/fitCovGraph.Rd0000644000176200001440000000623112312776012014305 0ustar liggesusers\name{fitCovGraph} \alias{fitCovGraph} \title{Fitting of Gaussian covariance graph models} \description{ Fits a Gaussian covariance graph model by maximum likelihood. } \usage{ fitCovGraph(amat, S,n ,alg = "icf", dual.alg = 2, start.icf = NULL, tol = 1e-06) } \arguments{ \item{amat}{A symmetric Booloean matrix with dimnames representing the adjacency matrix of an UG.} \item{S}{A symmetric positive definite matrix with dimnames, the sample covariance matrix.} \item{n}{A positive integer, the sample size.} \item{alg}{A character string, the algorithm used. If \code{alg="icf"} (the default) the algorithm is based on iterative conditional fitting (see Drton and Richardson, 2003). In this case the ML estimates are returned. If \code{alg="dual"} the algorithm is based on the dual likelihood (see Kauermann, 1996). The fitted values are an approximation of the ML estimates. } \item{dual.alg}{And integer equal to 1 or 2. It is used if \code{alg="dual"}. In this case a concentration graph model is fitted to the inverse of the sample covariance matrix, and \code{dual.alg} is passed to \code{fitConGraph} to specify the algorithm used in \code{fitConGraph}. } \item{start.icf}{A symmetric matrix used as starting value of the algorithm. If \code{start=NULL} the starting value is a diagonal matrix with diagonal entries equal to sample variances.} \item{tol}{A small positive number indicating the tolerance used in convergence tests.} } \details{ A covariance graph is an undirected graph in which the variables associated to two non-adjacent nodes are marginally independent. The edges of these models are represented by bi-directed edges (Drton and Richardson, 2003) or by dashed lines (Cox and Wermuth, 1996). By default, this function gives the ML estimates in the covariance graph model, by iterative conditional fitting (Drton and Richardson, 2003). Otherwise, the estimates from a ``dual likelihood'' estimator can be obtained (Kauermann, 1996; Edwards, 2000, section 7.4). } \value{ \item{Shat}{the fitted covariance matrix.} \item{dev}{the `deviance' of the model.} \item{df}{the degrees of freedom.} \item{it}{the iterations.} } \references{ Cox, D. R. and Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall. Drton, M. and Richardson, T. S. (2003). A new algorithm for maximum likelihood estimation in Gaussian graphical models for marginal independence. \emph{Proceedings of the Nineteenth Conference on Uncertainty in Artificial Intelligence}, 184--191. Kauermann, G. (1996). On a dualization of graphical Gaussian models. \emph{Scandinavian Journal of Statistics}. 23, 105--116. } \author{Mathias Drton} \seealso{\code{\link{fitConGraph}}, \code{\link{icf}}} \examples{ ## Correlations among four strategies to cope with stress for ## 72 students. Cox & Wermuth (1996), p. 73. data(stress) ## A chordless 4-cycle covariance graph G <- UG(~ Y*X + X*U + U*V + V*Y) fitCovGraph(G, S = stress, n=72) fitCovGraph(G, S = stress, n=72, alg="dual") } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/pcor.test.Rd0000644000176200001440000000150212312776012014006 0ustar liggesusers\name{pcor.test} \alias{pcor.test} \title{Test for zero partial association} \description{ Test for conditional independence between two variables, given the other ones, assuming a multivariate normal distribution. } \usage{ pcor.test(r, q, n) } \arguments{ \item{r}{a partial correlation coefficient, computed by \code{\link{pcor}}.} \item{q}{the number of variables in the conditioning set.} \item{n}{integer > 0, the sample size.} } \value{ \item{tval}{The Student's t-test statistic.} \item{df}{The degrees of freedom} \item{pvalue}{The P-value, assuming a two-sided alternative.} } \author{Giovanni M. Marchetti} \seealso{\code{\link{pcor}}, \code{\link{shipley.test}}} \examples{ ## Are 2,3 independent given 1? data(marks) pcor.test(pcor(c(2,3,1), var(marks)), 1, n=88) } \keyword{htest} \keyword{multivariate} ggm/man/essentialGraph.Rd0000644000176200001440000000214112312776012015036 0ustar liggesusers\name{essentialGraph} \alias{essentialGraph} \title{Essential graph} \description{ Find the essential graph from a given directed acyclic graph. } \usage{ essentialGraph(dagx) } \arguments{ \item{dagx}{a square binary matrix, the adjacency matrix of a directed acyclic graph. The names of rows and of the columns are the nodes of the DAG.} } \details{Converts a DAG into the Essential Graph. Is implemented by the algorithm by D.M.Chickering (1995). } \value{ returns the adjacency matrix of the essential graph. } \references{ Chickering, D.M. (1995). A transformational characterization of equivalent Bayesian network structures. \emph{Proceedings of Eleventh Conference on Uncertainty in Artificial Intelligence}, Montreal, QU, 87-98. Morgan Kaufmann. \url{http://research.microsoft.com/~dmax/publications/uai95.pdf} } \author{Giovanni M. Marchetti, translating a MATLAB function by Tomas Kocka, AAU} \seealso{\code{\link{DAG}}, \code{\link{InducedGraphs}}} \examples{ dag = DAG(U ~ Y+Z, Y~X, Z~X) essentialGraph(dag) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/surdata.Rd0000644000176200001440000000107012312776012013530 0ustar liggesusers\name{surdata} \alias{surdata} \docType{data} \title{ A simulated data set } \description{ Simulated data following a seemingly unrelated regression model. } \usage{data(surdata)} \format{ A data frame with 600 observations on the following 4 variables. \describe{ \item{\code{A}}{a numeric response vector} \item{\code{B}}{a numeric response vector} \item{\code{X}}{a numeric vector} \item{\code{Z}}{a numeric vector with codes \code{1} and \code{-1} for a binary variables.} } } \examples{ data(surdata) pairs(surdata) } \keyword{datasets} ggm/man/RepMarDAG.Rd0000644000176200001440000000454012312776012013574 0ustar liggesusers\name{RepMarDAG} \alias{RepMarDAG} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Representational Markov equivalence to directed acyclic graphs. } \description{ \code{RepMarDAG} determines whether a given maximal ancestral graph can be Markov equivalent to a directed acyclic graph, and if that is the case, it finds a directed acyclic graph that is Markov equivalent to the given graph.} \usage{ RepMarDAG(amat) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{ An adjacency matrix, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } } \details{ \code{RepMarDAG} first looks whether the subgraph induced by full lines is chordal and whether there is a minimal collider path or cycle of length 4 in graph.} \value{ A list with two components: \code{verify} and \code{amat}. \code{verify} is a logical value, \code{TRUE} if there is a representational Markov equivalence and \code{FALSE} otherwise. \code{amat} is either \code{NA} if \code{verify == FALSE} or the adjacency matrix of the generated graph, if \code{verify == TRUE}. In this case it consists of 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{ Sadeghi, K. (2011). Markov equivalences for subclasses of loopless mixed graphs. \emph{Submitted}, 2011. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{MarkEqMag}}, \code{\link{MarkEqRcg}}, \code{\link{RepMarBG}}, \code{\link{RepMarUG}}} \examples{ H<-matrix(c(0,10,0,0,10,0,0,0,0,1,0,100,0,0,100,0),4,4) RepMarBG(H) } \keyword{graphs} \keyword{bidirected graph} \keyword{Markov equivalence} \keyword{maximal ancestral graph} \keyword{representational Markov equivalence} ggm/man/MRG.Rd0000644000176200001440000000774712336136204012532 0ustar liggesusers\name{MRG} \alias{MRG} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Maximal ribbonless graph } \description{ \code{MRG} generates and plots maximal ribbonless graphs (a modification of MC graph to use m-separation) after marginalisation and conditioning.} \usage{ MRG(amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, \dots) } \arguments{ \item{amat}{ An adjacency matrix, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } \item{M}{ A subset of the node set of \code{a} that is going to be marginalized over} \item{C}{ Another disjoint subset of the node set of \code{a} that is going to be conditioned on. } % \item{byrownames}{ %A logical value. \code{TRUE} (by default) if \code{M} and \code{C} are %the subsets of the labels of the node set of the %graph or the row-name of the matrix of the first argument. \code{FALSE} %if \code{M} and \code{C} are the subsets of the set \eqn{\{1, \dots, n\}}, where \eqn{n} is the %number of the nodes. %} \item{showmat}{ A logical value. \code{TRUE} (by default) to print the generated matrix. } \item{plot}{ A logical value, \code{FALSE} (by default). \code{TRUE} to plot the generated graph. } \item{plotfun}{ Function to plot the graph when \code{plot == TRUE}. Can be \code{plotGraph} (the default) or \code{drawGraph}. } \item{\dots}{Further arguments passed to \code{plotfun}.} } \details{ This function uses the functions \code{\link{RG}} and \code{\link{Max}}. } \value{ A matrix that consists 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{ Koster, J.T.A. (2002). Marginalizing and conditioning in graphical models. \emph{Bernoulli}, 8(6), 817-840. Richardson, T.S. and Spirtes, P. (2002). Ancestral graph Markov models. \emph{Annals of Statistics}, 30(4), 962-1030. Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic graphs. \emph{Submitted}. Sadeghi, K. and Lauritzen, S.L. (2011). Markov properties for loopless mixed graphs. \emph{Submitted}. URL \url{http://arxiv.org/abs/1109.5909}. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{MAG}}, \code{\link{Max}}, \code{\link{MSG}}, \code{\link{RG}} } \examples{ ex <- matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ##The adjacency matrix of a DAG 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0),16,16, byrow = TRUE) M <- c(3,5,6,15,16) C <- c(4,7) MRG(ex, M, C, plot = TRUE) ################################################### H <- matrix(c( 0, 100, 1, 0, 100, 0, 100, 0, 0, 100, 0, 100, 0, 1, 100, 0), 4,4) Max(H) } \keyword{graphs} \keyword{directed acyclic graph} \keyword{marginalisation and conditioning} \keyword{maximality of graphs} \keyword{MC graph} \keyword{ribbonless graph}ggm/man/In.Rd0000644000176200001440000000175212312776012012442 0ustar liggesusers\name{In} \alias{In} \title{Indicator matrix} \description{ Finds the indicator matrix of the zeros of a matrix. } \usage{ In(A) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{A}{a matrix.} } \details{ The indicator matrix is a matrix of zeros and ones which has a zero element iff the corresponding element of \code{A} is (exactly) zero. } \value{a matrix of the same dimensions as \code{A}. } \references{ Wermuth, N. \& Cox, D.R. (2004). Joint response graphs and separation induced by triangular systems. \emph{J.R. Statist. Soc. B}, 66, Part 3, 687-717. } \author{Giovanni M. Marchetti} \seealso{\code{\link{DAG}}, \code{\link{inducedCovGraph}}, \code{\link{inducedConGraph}}} \examples{ ## A simple way to find the overall induced concentration graph ## The DAG on p. 198 of Cox & Wermuth (1996) amat <- DAG(y1 ~ y2 + y3, y3 ~ y5, y4 ~ y5) A <- edgematrix(amat) In(crossprod(A)) } \keyword{array} \keyword{algebra} \keyword{graphs} \keyword{multivariate} ggm/man/MarkEqMag.Rd0000644000176200001440000000301112312776012013667 0ustar liggesusers\name{MarkEqMag} \alias{MarkEqMag} \title{ Markov equivalence of maximal ancestral graphs } \description{ \code{MarkEqMag} determines whether two MAGs are Markov equivalent. } \usage{ MarkEqMag(amat, bmat) } \arguments{ \item{amat}{ An adjacency matrix of a MAG, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } \item{bmat}{ The same as \code{amat}. } } \details{ The function checks whether the two graphs have the same skeleton and colliders with order. } \value{ "Markov Equivalent" or "NOT Markov Equivalent". } \references{ Ali, R.A., Richardson, T.S. and Spirtes, P. (2009) Markov equivalence for ancestral graphs. \emph{Annals of Statistics}, 37(5B),2808-2837. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{MarkEqRcg}}, \code{\link{msep}} } \examples{ H1<-matrix( c(0,100, 0, 0, 100, 0,100, 0, 0,100, 0,100, 0, 1,100, 0), 4, 4) H2<-matrix(c(0,0,0,0,1,0,100,0,0,100,0,100,0,1,100,0),4,4) H3<-matrix(c(0,0,0,0,1,0,0,0,0,1,0,100,0,1,100,0),4,4) MarkEqMag(H1,H2) MarkEqMag(H1,H3) MarkEqMag(H2,H3) } \keyword{graphs} \keyword{Markov equivalence} \keyword{maximal ancestral graphs} \keyword{multivariate}ggm/man/stress.Rd0000644000176200001440000000132512312776012013413 0ustar liggesusers\name{stress} \alias{stress} \docType{data} \title{ Stress } \description{ Stress data } \usage{data(stress)} \format{ A \eqn{4 \times 4} covariance matrix for the following variables. \describe{ \item{Y}{} \item{V}{} \item{X}{} \item{U}{} } } \details{ See Cox and Wermuth (1996). } \references{ Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall. Slangen K., Kleemann P.P and Krohne H.W. (1993). Coping with surgical stress. In: Krohne H. W. (ed.). \emph{Attention and avoidance: Strategies in coping with aversiveness}. New York, Heidelberg: Springer, 321-346. } \examples{ data(stress) G = UG(~ Y*X + X*V + V*U + U*Y) fitConGraph(G, stress, 100) } \keyword{datasets} ggm/man/DG.Rd0000644000176200001440000000227512312776012012367 0ustar liggesusers\name{DG} \alias{DG} \title{Directed graphs} \description{ Defines the adjacency of a directed graph. } \usage{ DG(\dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{a sequence of model formulae} } \details{ The directed graph is defined by a sequence of models formulae. For each formula the response defines a node of the graph and its parents. The graph contains no loops. } \value{the adjacency matrix of the directed graph, i.e., a square Boolean matrix of order equal to the number of nodes of the graph and a one in position \eqn{(i,j)} if there is an arrow from \eqn{i} to \eqn{j} and zero otherwise. The dimnames of the adjacency matrix are the labels for the nodes of the graph. } \references{ Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press. } \author{G. M. Marchetti} \seealso{\code{\link{DAG}}, \code{\link{UG}}} \examples{ ## A DAG DG(y ~ x, x ~ z, z ~ u) ## A cyclic directed graph DG(y ~ x, x ~ z, z ~ y) ## A graph with two arrows between two nodes DG(y ~ x, x ~ y) ## There can be isolated nodes DG(y ~ x, x ~ x) } \keyword{graphs} \keyword{directed graph} \keyword{models} \keyword{multivariate} ggm/man/blodiag.Rd0000644000176200001440000000160512312776012013472 0ustar liggesusers\name{blodiag} \alias{blodiag} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Block diagonal matrix } \description{ Split a vector x into a block diagonal matrix. } \usage{ blodiag(x, blo) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ A vector of length \code{n}. } \item{blo}{ A vector of positive integers such that \code{sum(blo) == n}. } } \value{ A block-diagonal matrix with as many row as elements of \code{blo} and \code{n} columns. The vector \code{x} is split into \code{length(blo)} sub-vectors and these are the blocks of the resulting matrix. } \author{ Giovanni M. Marchetti } \seealso{ \code{\link{blkdiag}}, \code{\link{diag}} } \examples{ blodiag(1:10, blo = c(2, 3, 5)) blodiag(1:10, blo = c(3,4,0,1)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{matrix} ggm/man/unmakeMG.Rd0000644000176200001440000000266612312776012013605 0ustar liggesusers\name{unmakeMG} \alias{unmakeMG} \title{Loopless mixed graphs components} \description{ Splits the adjacency matrix of a loopless mixed graph into three components: directed, undirected and bi-directed. } \usage{ unmakeMG(amat) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{a square matrix, with dimnames, representing a loopless mixed graph. The matrix consists of 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } } \value{ It is the inverse of \code{makeAG}. It returns the following components. \item{dg}{the adjacency matrix of the directed edges.} \item{ug}{the adjacency matrix of the undirected edges.} \item{bg}{the adjacency matrix of the bi-directed edges.} } \details{The matrices \code{ug}, and \code{bg} are just symmetric Boolean matrices.} \author{Mathias Drton, Giovanni M. Marchetti} \seealso{\code{\link{makeMG}}} \examples{ ag <- makeMG(ug=UG(~y0*y1), dg=DAG(y4~y2, y2~y1), bg=UG(~y2*y3+y3*y4)) isAG(ag) unmakeMG(ag) } \keyword{graphs} \keyword{ancestral graph} \keyword{mixed graph} \keyword{models} \keyword{multivariate} ggm/man/fitConGraph.Rd0000644000176200001440000000503112463742360014300 0ustar liggesusers\name{fitConGraph} \alias{fitConGraph} \title{Fitting a Gaussian concentration graph model} \description{ Fits a concentration graph (a covariance selection model). } \usage{ fitConGraph(amat, S, n, cli = NULL, alg = 3, pri = FALSE, tol = 1e-06) } \arguments{ \item{amat}{a square Boolean matrix representing the adjacency matrix of an UG} \item{S}{the sample covariance matrix} \item{n}{an integer denoting the sample size} \item{cli}{a list containing the cliques of the graph. The components of the list are character vectors containing the names of the nodes in the cliques. The names must match the names of the vertices. The knowledge of the cliques is not needed. If the cliques are not specified the function uses the algorithm by Hastie et al. (2009, p. 446).} \item{alg}{The algorithm used.} \item{pri}{If TRUE is verbose} \item{tol}{a small positive number indicating the tolerance used in convergence tests.} } \details{ The algorithms for fitting concentration graph models by maximum likelihood are discussed in Speed and Kiiveri (1986). If the cliques are known the function uses the iterative proportional fitting algorithm described by Whittaker (1990, p. 184). If the cliques are not specified the function uses the algorithm by Hastie et al. (2009, p. 631ff). } \value{ \item{Shat}{the fitted covariance matrix.} \item{dev}{the `deviance' of the model.} \item{df}{the degrees of freedom.} \item{it}{the iterations.} } \references{ Cox, D. R. and Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall. Hastie, T., Tibshirani, R. and Friedman, J. (2009). \emph{The elements of statistical learning.} Springer Verlag: New York. Speed, T.P. and Kiiveri, H (1986). Gaussian Markov distributions over finite graphs. \emph{Annals of Statistics}, 14, 138--150. Whittaker, J. (1990). \emph{Graphical models in applied multivariate statistics}. Chichester: Wiley. } \author{Giovanni M. Marchetti} \seealso{\code{\link{UG}}, \code{\link{fitDag}}, \code{\link{marks}}} \examples{ ## A model for the mathematics marks (Whittaker, 1990) data(marks) ## A butterfly concentration graph G <- UG(~ mechanics*vectors*algebra + algebra*analysis*statistics) fitConGraph(G, cov(marks), nrow(marks)) ## Using the cliques cl = list(c("mechanics", "vectors", "algebra"), c("algebra", "analysis" , "statistics")) fitConGraph(G, S = cov(marks), n = nrow(marks), cli = cl) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/UG.Rd0000644000176200001440000000345512312776012012411 0ustar liggesusers\name{UG} \alias{UG} \title{Defining an undirected graph (UG)} \description{ A simple way to define an undirected graph by means of a single model formula. } \usage{ UG(f) } \arguments{ \item{f}{a single model formula without response} } \details{ The undirected graph \eqn{G = (V, E)} is defined by a set of nodes \eqn{V} and a set of pairs \eqn{E}. The set of pairs is defined by the set of interactions in the formula. Interactions define complete subgraphs (not necessarily maximal) of the UG. The best way is to specify interactions that match the cliques of the undirected graph. This is the standard way to define graphical models for contingency tables. Remember that some hierarchical models are not graphical, but they imply the same graph. The function returns the edge matrix of the graph, i.e. a square Boolean matrix of order equal to the number of nodes of the graph and a one in position \eqn{(i,j)} if there is an arrow from \eqn{j} to \eqn{i} and zero otherwise. By default this matrix has ones along the main diagonal. For UGs this matrix is symmetric. The dimnames of the edge matrix are the nodes of the UG. } \value{ a Boolean matrix with dimnames, the adjacency matrix of the undirected graph. } \references{Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press.} \author{Giovanni M. Marchetti} \seealso{\code{\link{fitConGraph}}, \code{\link{fitCovGraph}}, \code{\link{DAG}}} \examples{ ## X independent of Y given Z UG(~ X*Z + Y*Z) # The saturated model UG(~ X*Y*Z) ## The model without three-way interactions has the same graph UG(~ X*Y + Y*Z + Z*X) UG(~ (X + Y + Z)^2) ## Butterfly model defined from the cliques UG(~ mec*vec*alg + alg*ana*sta) ## Some isolated nodes UG(~x*y*z + a + b) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/checkIdent.Rd0000644000176200001440000000450312312776012014132 0ustar liggesusers\name{checkIdent} \alias{checkIdent} \title{Identifiability of a model with one latent variable} \description{ Checks four sufficient conditions for identifiability of a Gaussian DAG model with one latent variable. } \usage{ checkIdent(amat, latent) } \arguments{ \item{amat}{a square matrix with dimnames, representing the adjacency matrix of a DAG.} \item{latent}{an integer representing the latent variables among the nodes, or the name of the node.} } \details{ Stanghellini and Wermuth (2005) give some sufficient conditions for checking if a Gaussian model that factorizes according to a DAG is identified when there is one hidden node over which we marginalize. Specifically, the function checks the conditions of Theorem 1, (i) and (ii) and of Theorem 2 (i) and (ii). } \value{ a vector of length four, indicating if the model is identified according to the conditions of theorems 1 and 2 in Stanghellini \& Wermuth (2005). The answer is \code{TRUE} if the condition holds and thus the model is globally identified or \code{FALSE} if the condition fails, and thus we do not know if the model is identifiable. } \references{ Stanghellini, E. \& Wermuth, N. (2005). On the identification of path-analysis models with one hidden variable. \emph{Biometrika}, 92(2), 337-350. } \author{Giovanni M. Marchetti} \seealso{\code{\link{isGident}}, \code{\link{InducedGraphs}}} \examples{ ## See DAG in Figure 4 (a) in Stanghellini & Wermuth (2005) d <- DAG(y1 ~ y3, y2 ~ y3 + y5, y3 ~ y4 + y5, y4 ~ y6) checkIdent(d, "y3") # Identifiable checkIdent(d, "y4") # Not identifiable? ## See DAG in Figure 5 (a) in Stanghellini & Wermuth (2005) d <- DAG(y1 ~ y5+y4, y2 ~ y5+y4, y3 ~ y5+y4) checkIdent(d, "y4") # Identifiable checkIdent(d, "y5") # Identifiable ## A simple function to check identifiability for each node is.ident <- function(amat){ ### Check suff. conditions on each node of a DAG. p <- nrow(amat) ## Degrees of freedom df <- p*(p+1)/2 - p - sum(amat==1) - p + 1 if(df <= 0) warning(paste("The degrees of freedom are ", df)) a <- rownames(amat) for(i in a) { b <- checkIdent(amat, latent=i) if(TRUE \%in\% b) cat("Node", i, names(b)[!is.na(b)], "\n") else cat("Unknown.\n") } } } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/DAG.Rd0000644000176200001440000000447612312776012012475 0ustar liggesusers\name{DAG} \alias{DAG} \title{Directed acyclic graphs (DAGs)} \description{ A simple way to define a DAG by means of regression model formulae. } \usage{ DAG(\dots, order = FALSE) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{a sequence of model formulae} \item{order}{logical, defaulting to \code{FALSE}. If \code{TRUE} the nodes of the DAG are permuted according to the topological order. If \code{FALSE} the nodes are in the order they first appear in the model formulae (from left to right).} } \details{ The DAG is defined by a sequence of recursive regression models. Each regression is defined by a model formula. For each formula the response defines a node of the graph and the explanatory variables the parents of that node. If the regressions are not recursive the function returns an error message. Some authors prefer the terminology acyclic directed graphs (ADG). } \value{the adjacency matrix of the DAG, i.e. a square Boolean matrix of order equal to the number of nodes of the graph and a one in position \eqn{(i,j)} if there is an arrow from \eqn{i} to \eqn{j} and zero otherwise. The rownames of the adjacency matrix are the nodes of the DAG. If \code{order = TRUE} the adjacency matrix is permuted to have parents before children. This can always be done (in more than one way) for DAGs. The resulting adjacency matrix is upper triangular. } \references{ Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press. } \author{G. M. Marchetti} \note{The model formulae may contain interactions, but they are ignored in the graph.} \seealso{\code{\link{UG}}, \code{\link{topSort}}, \code{\link{edgematrix}}, \code{\link{fitDag}}} \examples{ ## A Markov chain DAG(y ~ x, x ~ z, z ~ u) ## Another DAG DAG(y ~ x + z + u, x ~ u, z ~ u) ## A DAG with an isolated node DAG(v ~ v, y ~ x + z, z ~ w + u) ## There can be repetitions DAG(y ~ x + u + v, y ~ z, u ~ v + z) ## Interactions are ignored DAG(y ~ x*z + z*v, x ~ z) ## A cyclic graph returns an error! \dontrun{DAG(y ~ x, x ~ z, z ~ y)} ## The order can be changed DAG(y ~ z, y ~ x + u + v, u ~ v + z) ## If you want to order the nodes (topological sort of the DAG) DAG(y ~ z, y ~ x + u + v, u ~ v + z, order=TRUE) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/basiSet.Rd0000644000176200001440000000257212312776012013467 0ustar liggesusers\name{basiSet} \alias{basiSet} \title{Basis set of a DAG} \description{ Finds a basis set for the conditional independencies implied by a directed acyclic graph, that is a minimal set of independencies that imply all the other ones. } \usage{ basiSet(amat) } \arguments{ \item{amat}{a square matrix with dimnames representing the adjacency matrix of a DAG.} } \details{ Given a DAG and a pair of non adjacent nodes \eqn{(i,j)} such that \eqn{j} has higher causal order than \eqn{i}, the set of independency statements \eqn{i} independent of \eqn{j} given the union of the parents of both \eqn{i} and \eqn{j} is a basis set (see Shipley, 2000). This basis set has the property to lead to independent test statistics. } \value{ a list of vectors representing several conditional independence statements. Each vector contains the names of two non adjacent nodes followed by the names of nodes in the conditioning set (which may be empty). } \references{Shipley, B. (2000). A new inferential test for path models based on directed acyclic graphs. \emph{Structural Equation Modeling}, 7(2), 206--218.} \author{Giovanni M. Marchetti} \seealso{\code{\link{shipley.test}}, \code{\link{dSep}}, \code{\link{DAG}}} \examples{ ## See Shipley (2000), Figure 2, p. 213 A <- DAG(x5~ x3+x4, x3~ x2, x4~x2, x2~ x1) basiSet(A) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/mat.mlogit.Rd0000644000176200001440000000223412312776012014143 0ustar liggesusers\name{mat.mlogit} \alias{mat.mlogit} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Multivariate logistic parametrization } \description{ Find matrices \code{C} and \code{M} of e binary multivariate logistic parameterization. } \usage{ mat.mlogit(d, P = powerset(1:d)) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{d}{ A positive integer, the number of binary responses. } \item{P}{ A list of vectors of integers specifying margins. For instance \code{list(1, 2, c(1,2))}. Default: the power set of \code{1:d}. } } \details{ The power set is in the order of dimensions of the sets. } \value{ \item{C}{A contrast matrix.} \item{L}{A marginalization matrix.} } \references{ Glonek, G. J. N. and McCullagh, P. (1995). Multivariate logistic models. Journal of the Royal Statistical Society, Ser. B 57, 533-546. } \author{ Giovanni M. Marchetti } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{binomial}, \link{marg.param}} } \examples{ mat.mlogit(2) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{logistic model} ggm/man/InducedGraphs.Rd0000644000176200001440000001525412312776012014616 0ustar liggesusers\name{InducedGraphs} \alias{inducedCovGraph} \alias{inducedConGraph} \alias{inducedRegGraph} \alias{inducedChainGraph} \alias{inducedDAG} \alias{InducedGraphs} \title{Graphs induced by marginalization or conditioning} \description{ Functions to find induced graphs after conditioning on a set of variables and marginalizing over another set. } \usage{ inducedCovGraph(amat, sel = rownames(amat), cond = NULL) inducedConGraph(amat, sel = rownames(amat), cond = NULL) inducedRegGraph(amat, sel = rownames(amat), cond = NULL) inducedChainGraph(amat, cc=rownames(amat), cond = NULL, type="LWF") inducedDAG(amat, order, cond = NULL) } \arguments{ \item{amat}{a square Boolean matrix, the adjacency matrix of a directed acyclic graph. The names of rows and of the columns are the nodes of the DAG.} \item{sel}{a character vector representing a subset of selected variables. The elements of the vector must be a subset of the names of the nodes i.e. of \code{rownames(A)}. By default \code{sel} is the set of the nodes of the DAG.} \item{cond}{a character vector representing the variables on which you want to condition. \code{cond} must be disjoint from \code{sel} and their union must be a subset of the set of nodes. The set difference between the set of nodes and the union of \code{sel} and \code{cond} are the variables over which we marginalize. \code{cond} may be the null vector (the default), meaning that you want to condition on the empty set.} \item{cc}{a list of character vectors specifying the chain components for the chain graph.} \item{type}{a string indicating the interpretation of the chain graph. It can be either "LWF" (Lauritzen, Wermuth, Frydenberg interpretation), "AMP" (Andersson, Madigan, Perlman interpretation) or "MRG" (Multivariate regression graph interpretation).} \item{order}{a character vector indicating the ordering of the vertices of a DAG (left to right, past to future).} } \details{Given a directed acyclic graph representing a set of conditional independencies it is possible to obtain other graphs of conditional independence implied after marginalizingover and conditionig on sets of nodes. Such graphs are the covariance graph, the concentration graph, the multivariate regression graph and the chain graph with different interpretations (see Cox \& Wermuth, 1996, 2004). } \value{ \code{inducedCovGraph} returns the adjacency matrix of the covariance graph of the variables in set \code{sel} given the variables in set \code{cond}, implied by the original directed acyclic graph with adjacency matrix \code{amat}. \code{inducedConGraph} returns the adjacency matrix of the concentration graph of the variables in set \code{sel} given the variables in set \code{cond}, implied by the original directed acyclic graph with adjacency matrix \code{amat}. \code{inducedRegGraph} returns the adjacency matrix of the multivariate regression graph of the variables in set \code{sel} given the variables in set \code{cond}, implied by the original directed acyclic graph with adjacency matrix \code{amat}. \code{inducedChainGraph} returns the adjacency matrix of the chain graph for the variables in chain components \code{cc}, given the variables in set \code{cond}, with interpretation specified by string \code{type}, implied by the original directed acyclic graph with adjacency matrix \code{amat}. \code{inducedDAG} returns the adjacency matrix of the DAG with the ordering \code{order}, implied by the original directed acyclic graph with adjacency matrix \code{amat}. } \note{ If \code{sel} is \code{NULL} the functions return the null matrix. If \code{cond} is \code{NULL}, the conditioning set is empty and the functions \code{inducedConGraph} and \code{inducedCovGraph} return the overall induced covariance or concentration matrices of the selected variables. If you do not specify \code{sel} you cannot specify a non \code{NULL} value of \code{cond}. } \references{ Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall. Wermuth, N. \& Cox, D.R. (2004). Joint response graphs and separation induced by triangular systems. \emph{J.R. Statist. Soc. B}, 66, Part 3, 687-717. } \author{Giovanni M. Marchetti} \seealso{\code{\link{DAG}}, \code{\link{UG}},\code{\link{isAcyclic}}} \examples{ ## Define a DAG dag <- DAG(a ~ x, c ~ b+d, d~ x) dag ## Induced covariance graph of a, b, d given the empty set. inducedCovGraph(dag, sel=c("a", "b", "d"), cond=NULL) ## Induced concentration graph of a, b, c given x inducedConGraph(dag, sel=c("a", "b", "c"), cond="x") ## Overall covariance graph inducedCovGraph(dag) ## Overall concentration graph inducedConGraph(dag) ## Induced covariance graph of x, b, d given c, x. inducedCovGraph(dag, sel=c("a", "b", "d"), cond=c("c", "x")) ## Induced concentration graph of a, x, c given d, b. inducedConGraph(dag, sel=c("a", "x", "c"), cond=c("d", "b")) ## The DAG on p. 198 of Cox & Wermuth (1996) dag <- DAG(y1~ y2 + y3, y3 ~ y5, y4 ~ y5) ## Cf. figure 8.7 p. 203 in Cox & Wermuth (1996) inducedCovGraph(dag, sel=c("y2", "y3", "y4", "y5"), cond="y1") inducedCovGraph(dag, sel=c("y1", "y2", "y4", "y5"), cond="y3") inducedCovGraph(dag, sel=c("y1", "y2", "y3", "y4"), cond="y5") ## Cf. figure 8.8 p. 203 in Cox & Wermuth (1996) inducedConGraph(dag, sel=c("y2", "y3", "y4", "y5"), cond="y1") inducedConGraph(dag, sel=c("y1", "y2", "y4", "y5"), cond="y3") inducedConGraph(dag, sel=c("y1", "y2", "y3", "y4"), cond="y5") ## Cf. figure 8.9 p. 204 in Cox & Wermuth (1996) inducedCovGraph(dag, sel=c("y2", "y3", "y4", "y5"), cond=NULL) inducedCovGraph(dag, sel=c("y1", "y2", "y4", "y5"), cond=NULL) inducedCovGraph(dag, sel=c("y1", "y2", "y3", "y4"), cond=NULL) ## Cf. figure 8.10 p. 204 in Cox & Wermuth (1996) inducedConGraph(dag, sel=c("y2", "y3", "y4", "y5"), cond=NULL) inducedConGraph(dag, sel=c("y1", "y2", "y4", "y5"), cond=NULL) inducedConGraph(dag, sel=c("y1", "y2", "y3", "y4"), cond=NULL) ## An induced regression graph dag2 = DAG(Y ~ X+U, W ~ Z+U) inducedRegGraph(dag2, sel="W", cond=c("Y", "X", "Z")) ## An induced DAG inducedDAG(dag2, order=c("X","Y","Z","W")) ## An induced multivariate regression graph inducedRegGraph(dag2, sel=c("Y", "W"), cond=c("X", "Z")) ## An induced chain graph with LWF interpretation dag3 = DAG(X~W, W~Y, U~Y+Z) cc = list(c("W", "U"), c("X", "Y", "Z")) inducedChainGraph(dag3, cc=cc, type="LWF") ## ... with AMP interpretation inducedChainGraph(dag3, cc=cc, type="AMP") ## ... with multivariate regression interpretation cc= list(c("U"), c("Z", "Y"), c("X", "W")) inducedChainGraph(dag3, cc=cc, type="MRG") } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/SG.Rd0000644000176200001440000000605012336141515012401 0ustar liggesusers\name{SG} \alias{SG} %- Also NEED an '\alias' for EACH other topic documented here. \title{ summary graph } \description{ \code{SG} generates and plots summary graphs after marginalization and conditioning.} \usage{ SG(amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, \dots) } \arguments{ \item{amat}{ An adjacency matrix, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } \item{M}{ A subset of the node set of \code{a} that is going to be marginalised over} \item{C}{ Another disjoint subset of the node set of \code{a} that is going to be conditioned on. } \item{showmat}{ A logical value. \code{TRUE} (by default) to print the generated matrix. } \item{plot}{ A logical value, \code{FALSE} (by default). \code{TRUE} to plot the generated graph. } \item{plotfun}{ Function to plot the graph when \code{plot == TRUE}. Can be \code{plotGraph} (the default) or \code{drawGraph}. } \item{\dots}{Further arguments passed to \code{plotfun}.} } \value{ A matrix that consists 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{ Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic graphs. \emph{Submitted}. Wermuth, N. (2011). Probability distributions with summary graph structure. \emph{Bernoulli}, 17(3),845-879. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{AG}}, \code{\link{MSG}}, \code{\link{RG}}} \examples{ ex <- matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ##The adjacency matrix of a DAG 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0),16,16, byrow = TRUE) M <- c(3,5,6,15,16) C <- c(4,7) SG(ex, M, C, plot = TRUE) SG(ex, M, C, plot = TRUE, plotfun = drawGraph, adjust = FALSE) } \keyword{graphs} \keyword{directed acyclic graph} \keyword{marginalization and conditioning} \keyword{summary graph} ggm/man/pcor.Rd0000644000176200001440000000225212312776012013033 0ustar liggesusers\name{pcor} \alias{pcor} \title{Partial correlation} \description{ Computes the partial correlation between two variables given a set of other variables. } \usage{ pcor(u, S) } \arguments{ \item{u}{a vector of integers of length > 1. The first two integers are the indices of variables the correlation of which must be computed. The rest of the vector is the conditioning set. } \item{S}{a symmetric positive definite matrix, a sample covariance matrix.} } \value{ a scalar, the partial correlation matrix between variables \code{u[1]} and \code{u[2]} given \code{u[-c(1,2)]}. } \author{Giovanni M. Marchetti} \seealso{\code{\link{cor}}, \code{\link{parcor}}, \code{\link{correlations}}} \examples{ data(marks) ## The correlation between vectors and algebra given analysis and statistics pcor(c("vectors", "algebra", "analysis", "statistics"), var(marks)) ## The same pcor(c(2,3,4,5), var(marks)) ## The correlation between vectors and algebra given statistics pcor(c("vectors", "algebra", "statistics"), var(marks)) ## The marginal correlation between analysis and statistics pcor(c("analysis","statistics"), var(marks)) } \keyword{models} \keyword{multivariate} ggm/man/bfsearch.Rd0000644000176200001440000000272312312776012013650 0ustar liggesusers\name{bfsearch} \alias{bfsearch} %- Also NEED an '\alias' for EACH other topic documented here. \title{Breadth first search} \description{Breadth-first search of a connected undirected graph. } \usage{ bfsearch(amat, v = 1) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{a symmetric matrix with dimnames specifying the adjacency matrix of the undirected graph} \item{v}{an integer, indicating the starting node of the search. Defaults to the first node. } } \details{ Breadth-first search is a systematic method for exploring a graph. The algorithm is taken from Aho, Hopcroft \& Ullman (1983). } \value{ \item{tree}{the edge matrix of the resulting spanning tree} \item{branches}{a matrix with two columns, giving the indices of the branches of the spanning tree} \item{chords}{a matrix with two columns, giving the indices of the chords of the spanning tree} } \references{ Aho, A.V., Hopcrtoft, J.E. \& Ullman, J.D. (1983). \emph{Data structures and algorithms.} Reading: Addison-Wesley. Thulasiraman, K. \& Swamy, M.N.S. (1992). \emph{Graphs: theory and algorithms}. New York: Wiley. } \author{Giovanni M. Marchetti} \seealso{\code{\link{UG}}, \code{\link{findPath}}, \code{\link{cycleMatrix}}} \examples{ ## Finding a spanning tree of the butterfly graph bfsearch(UG(~ a*b*o + o*u*j)) ## Starting from another node bfsearch(UG(~ a*b*o + o*u*j), v=3) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/MAG.Rd0000644000176200001440000000727012336136156012506 0ustar liggesusers\name{MAG} \alias{MAG} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Maximal ancestral graph } \description{ \code{MAG} generates and plots maximal ancestral graphs after marginalisation and conditioning.} \usage{ MAG(amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, \dots) } \arguments{ \item{amat}{ An adjacency matrix, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } \item{M}{ A subset of the node set of \code{a} that is going to be marginalized over} \item{C}{ Another disjoint subset of the node set of \code{a} that is going to be conditioned on. } %\item{byrownames}{ %A logical value. \code{TRUE} (by default) if \code{M} and \code{C} are %the subsets of the labels of the node set of the %graph or the row-name of the matrix of the first argument. \code{FALSE} %if \code{M} and \code{C} are the subsets of the set \eqn{\{1,\dots,n\}}, where \eqn{n} is the %number of the nodes.} \item{showmat}{ A logical value. \code{TRUE} (by default) to print the generated matrix. } \item{plot}{ A logical value, \code{FALSE} (by default). \code{TRUE} to plot the generated graph. } \item{plotfun}{ Function to plot the graph when \code{plot == TRUE}. Can be \code{plotGraph} (the default) or \code{drawGraph}. } \item{\dots}{Further arguments passed to \code{plotfun}.} } \details{ This function uses the functions \code{\link{AG}} and \code{\link{Max}}. } \value{ A matrix that consists 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{ Richardson, T. S. and Spirtes, P. (2002). Ancestral graph Markov models. \emph{Annals of Statistics}, 30(4), 962-1030. Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic graphs. \emph{Submitted}. Sadeghi, K. and Lauritzen, S.L. (2011). Markov properties for loopless mixed graphs. \emph{Submitted}. URL \url{http://arxiv.org/abs/1109.5909}.} \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{AG}}, \code{\link{Max}}, \code{\link{MRG}}, \code{\link{MSG}} } \examples{ ex<-matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ##The adjacency matrix of a DAG 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0), 16, 16, byrow = TRUE) M <- c(3,5,6,15,16) C <- c(4,7) MAG(ex, M, C, plot=TRUE) ################################################### H <- matrix(c(0,100,1,0,100,0,100,0,0,100,0,100,0,1,100,0),4,4) Max(H) } \keyword{ancestral graph} \keyword{directed acyclic graph} \keyword{marginalization and conditioning} \keyword{maximality of graphs} ggm/man/derived.Rd0000644000176200001440000000313412312776012013512 0ustar liggesusers\name{derived} \alias{derived} \docType{data} \title{Data on blood pressure body mass and age } \description{ Raw data on blood pressure, body mass and age on 44 female patients, and covariance matrix for derived variables. } \usage{data(derived)} \format{A list containing a dataframe \code{raw} with 44 lines and 5 columns and a symmetric 4x4 covariance matrix \code{S}. The following is the description of the variables in the dataframe \code{raw} \describe{ \item{\code{Sys}}{Systolic blood pressure, in mm Hg} \item{\code{Dia}}{Diastolic blood pressure, in mm Hg} \item{\code{Age}}{Age of the patient, in years} \item{\code{Hei}}{Height, in cm} \item{\code{Wei}}{Weight, in kg} } The following is the description of the variables for the covariance matrix \code{S}. \describe{ \item{\code{Y}}{Derived variable \code{Y=log(Sys/Dia)}} \item{\code{X}}{Derived variables \code{X=log(Dia)}} \item{\code{Z}}{Body mass index \code{Z=Wei/(Hei/100)^2}} \item{\code{W}}{Age} } } \references{ Wermuth N. and Cox D.R. (1995). Derived variables calculated from similar joint responses: some characteristics and examples. \emph{Computational Statistics and Data Analysis}, 19, 223-234. } \examples{ # A DAG model with a latent variable U G = DAG(Y ~ Z + U, X ~ U + W, Z ~ W) data(derived) # The model fitted using the derived variables out = fitDagLatent(G, derived$S, n = 44, latent = "U") # An ancestral graph model marginalizing over U H = AG(G, M = "U") # The ancestral graph model fitted obtaining the # same result out2 = fitAncestralGraph(H, derived$S, n = 44) } \keyword{datasets} ggm/man/findPath.Rd0000644000176200001440000000226713617225610013635 0ustar liggesusers\name{findPath} \alias{findPath} \title{Finding paths} \description{ Finds one path between two nodes of a graph. } \usage{ findPath(amat, st, en, path = c()) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{a square Boolean matrix with dimnames, the adjacency matrix of a graph.} \item{st}{an integer, the starting node.} \item{en}{an integer, the ending node.} \item{path}{a vector of integers, used in recursive calls. At the beginning is \code{NULL}. It should not be modified by the user.} } \value{ a vector of integers, the sequence of nodes of a path, starting from \code{st} to \code{en}. In some graphs (spanning trees) there is only one path between two nodes. } \note{ This function is not intended to be directly called by the user. } \references{Python Softftware Foundation (2003). Python Patterns --- Implementing Graphs. \url{http://www.python.org/doc/essays/graphs/}.} \author{Giovanni M. Marchetti, translating the original \pkg{Python} code (see references).} \seealso{\code{\link{fundCycles}}} \examples{ ## A (single) path on a spanning tree findPath(bfsearch(UG(~ a*b*c + b*d + d*e+ e*c))$tree, st=1, en=5) } \keyword{graphs} ggm/man/rsphere.Rd0000644000176200001440000000146412312776012013544 0ustar liggesusers\name{rsphere} \alias{rsphere} \title{Random vectors on a sphere} \description{ Generates a sample of points uniformly distributed on the surface of a sphere in d-space. } \usage{ rsphere(n, d) } \arguments{ \item{n}{an integer, the sample size.} \item{d}{an integer, the dimension of the space. For example, a circle is defined in 2D-space, a sphere in 3D-space.} } \details{The algorithm is based on normalizing to length 1 each d-vector of a sample from a multivariate normal \eqn{N(0, I)}. } \value{a matrix of \code{n} rows and \code{d} columns. } \author{Giovanni M. Marchetti} \seealso{\code{\link{rnorm}}, \code{\link{rcorr}}} \examples{ ## 100 points on circle z <- rsphere(100,2) plot(z) ## 100 points on a sphere z <- rsphere(100, 3) pairs(z) } \keyword{distribution} \keyword{multivariate} ggm/man/drawGraph.Rd0000644000176200001440000001243112352743055014014 0ustar liggesusers\name{drawGraph} \alias{drawGraph} \title{Drawing a graph with a simple point and click interface.} \description{Draw a graph from its adjacency matrix representation.} \usage{ drawGraph(amat, coor = NULL, adjust = FALSE, alpha = 1.5, beta = 3, lwd = 1, ecol = "blue", bda = 0.1, layout = layout.auto) } \arguments{ \item{amat}{the adjacency matrix representation of the graph. This can be an undirected graph, a directed acyclic graph or a mixed graph with at most a summary graph structure. See also \code{\link{plotGraph}}}. \item{coor}{an optional matrix of dimensions \code{p} x 2 where \eqn{p} is the number of vertices of the graph. If \code{coor=NULL} then the function chooses a default position for the nodes.} \item{adjust}{a logical value, defaults to \code{FALSE}. If \code{TRUE} the graph is plotted and the system waits until the mouse button is pressed (same behaviour of \code{locator} function.} \item{alpha}{a positive value between controlling the distance from the end of the edges to the nodes of the graph.} \item{beta}{a positive value controlling the distance of the labels of the variables from the nodes.} \item{lwd}{line width of the edges (default: 1).} \item{ecol}{color of the edges (default: "blue").} \item{bda}{bidirected edge arrow length (default: 0.1).} \item{layout}{The name of a function used to compute the (initial) layout of the graph. The default is \code{layout.auto}. This can be further adjusted if \code{adjust} is \code{TRUE}.} } \details{ The function is a very simple tool useful for displaying small graphs, with a rudimentary interface for moving nodes and edges of a given graph and adjusting the final plot. For better displays use \pkg{dynamicGraph} or \pkg{Rgraphviz} package in Bioconductor project. } \value{ The function plots the graph with a initial positioning of the nodes, as specified by \code{coor} and remains in a waiting state. The position of each node can be shifted by pointing and clicking (with the first mouse button) close to the node. When the mouse button is pressed the node which is closer to the selected point is moved to that position. Thus, one must be careful to click closer to the selected node than to any other node. The nodes can be moved to any position by repeating the previous operation. The adjustment process is terminated by pressing any mouse button other than the first. At the end of the process, the function returns invisibly the coordinates of the nodes. The coordinates may be used later to redisplay the graph. } \references{ \pkg{dynamicGraph}, \pkg{Rgraphwiz}, \url{http://www.bioconductor.org}. GraphViz, Graph Visualization Project. AT\&T Research. \url{http://www.graphviz.org}. } \author{Giovanni M. Marchetti} \seealso{\code{\link{UG}}, \code{\link{DAG}}, \code{\link{makeMG}}, \code{\link{plotGraph}}} \examples{ ## A directed acyclic graph d <- DAG(y1 ~ y2+y6, y2 ~ y3, y3 ~ y5+y6, y4 ~ y5+y6) \dontrun{drawGraph(d)} ## An undirected graph g <- UG(~giova*anto*armo + anto*arj*sara) \dontrun{drawGraph(d)} ## An ancestral graph ag <- makeMG(ug=UG(~y0*y1), dg=DAG(y4~y2, y2~y1), bg=UG(~y2*y3+y3*y4)) drawGraph(ag, adjust = FALSE) drawGraph(ag, adjust = FALSE) ## A more complex example with coordinates: the UNIX evolution xy <- structure(c(5, 15, 23, 25, 26, 17, 8, 6, 6, 7, 39, 33, 23, 49, 19, 34, 13, 29, 50, 68, 70, 86, 89, 64, 81, 45, 64, 49, 64, 87, 65, 65, 44, 37, 64, 68, 73, 85, 83, 95, 84, 0, 7, 15, 27, 44, 37, 36, 20, 51, 65, 44, 64, 59, 73, 69, 78, 81, 90, 97, 89, 72, 85, 74, 62, 68, 59, 52, 48, 43, 50, 34, 21, 18, 5, 1, 10, 2, 11, 2, 1, 44), .Dim = c(41, 2), .Dimnames = list(NULL, c("x", "y"))) Unix <- DAG( SystemV.3 ~ SystemV.2, SystemV.2 ~ SystemV.0, SystemV.0 ~ TS4.0, TS4.0 ~ Unix.TS3.0 + Unix.TS.PP + CB.Unix.3, PDP11.SysV ~ CB.Unix.3, CB.Unix.3 ~ CB.Unix.2, CB.Unix.2 ~ CB.Unix.1, Unix.TS.PP ~ CB.Unix.3, Unix.TS3.0 ~ Unix.TS1.0 + PWB2.0 + USG3.0 + Interdata, USG3.0 ~ USG2.0, PWB2.0 ~ Interdata + PWB1.2, USG2.0 ~ USG1.0, CB.Unix.1 ~ USG1.0, PWB1.2 ~ PWB1.0, USG1.0 ~ PWB1.0, PWB1.0 ~ FifthEd, SixthEd ~ FifthEd, LSX ~ SixthEd, MiniUnix ~ SixthEd, Interdata ~ SixthEd, Wollongong ~ SixthEd, SeventhEd ~ Interdata, BSD1 ~ SixthEd, Xenix ~ SeventhEd, V32 ~ SeventhEd, Uniplus ~ SeventhEd, BSD3 ~ V32, BSD2 ~ BSD1, BSD4 ~ BSD3, BSD4.1 ~ BSD4, EigthEd ~ SeventhEd + BSD4.1, NinethEd ~ EigthEd, Ultrix32 ~ BSD4.2, BSD4.2 ~ BSD4.1, BSD4.3 ~ BSD4.2, BSD2.8 ~ BSD4.1 + BSD2, BSD2.9 ~ BSD2.8, Ultrix11 ~ BSD2.8 + V7M + SeventhEd, V7M ~ SeventhEd ) drawGraph(Unix, coor=xy, adjust=FALSE) # dev.print(file="unix.fig", device=xfig) # Edit the graph with Xfig } \keyword{graphs} \keyword{hplot} \keyword{iplot} ggm/man/isAG.Rd0000644000176200001440000000260412312776012012714 0ustar liggesusers\name{isAG} \alias{isAG} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Ancestral graph } \description{ Check if it is an adjacency matrix of an ancestral graph } \usage{ isAG(amat) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{ %% ~~Describe \code{amat} here~~ } } \details{ Checks if the following conditions must hold: (i) no undirected edge meets an arrowhead; (ii) no directed cycles; (iii) spouses cannot be ancestors. For details see Richardson and Spirtes (2002). } \value{ A logical value, \code{TRUE} if it is an ancestral graph and \code{FALSE} otherwise. } \references{Richardson, T. S. and Spirtes, P. (2002). Ancestral Graph Markov Models. \emph{Annals of Statistics}, 30(4), 962--1030. } \author{ Giovanni M. Marchetti, Mathias Drton } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{makeMG}}, \code{\link{isADMG}}} \examples{ ## Examples from Richardson and Spirtes (2002) a1 <- makeMG(dg=DAG(a~b, b~d, d~c), bg=UG(~a*c)) isAG(a1) # Not an AG. (a2) p.969 a2 <- makeMG(dg=DAG(b ~ a, d~c), bg=UG(~a*c+c*b+b*d)) # Fig. 3 (b1) p.969 isAG(a2) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{graphs} \keyword{ancestral graph} \keyword{mixed graph} \keyword{models} \keyword{multivariate}ggm/man/fitAncestralGraph.Rd0000644000176200001440000001047612312776012015500 0ustar liggesusers\name{fitAncestralGraph} \alias{fitAncestralGraph} \title{Fitting of Gaussian Ancestral Graph Models} \description{ Iterative conditional fitting of Gaussian Ancestral Graph Models. } \usage{ fitAncestralGraph(amat, S, n, tol = 1e-06) } \arguments{ \item{amat}{a square matrix, representing the adjacency matrix of an ancestral graph.} \item{S}{a symmetric positive definite matrix with row and col names, the sample covariance matrix.} \item{n}{the sample size, a positive integer.} \item{tol}{a small positive number indicating the tolerance used in convergence checks.} } \details{ In the Gaussian case, the models can be parameterized using precision parameters, regression coefficients, and error covariances (compare Richardson and Spirtes, 2002, Section 8). This function finds the MLE \eqn{\hat \Lambda}{L} of the precision parameters by fitting a concentration graph model. The MLE \eqn{\hat B}{B} of the regression coefficients and the MLE \eqn{\hat\Omega}{O} of the error covariances are obtained by iterative conditional fitting (Drton and Richardson, 2003, 2004). The three sets of parameters are combined to the MLE \eqn{\hat\Sigma}{S} of the covariance matrix by matrix multiplication: \deqn{\hat\Sigma = \hat B^{-1}(\hat \Lambda+\hat\Omega)\hat B^{-T}.}{S = B^(-1) (L+O) B^(-t).} Note that in Richardson and Spirtes (2002), the matrices \eqn{\Lambda}{L} and \eqn{\Omega}{O} are defined as submatrices. } \value{ \item{Shat}{the fitted covariance matrix.} \item{Lhat}{matrix of the fitted precisions associated with undirected edges and vertices that do not have an arrowhead pointing at them.} \item{Bhat}{matrix of the fitted regression coefficients associated to the directed edges. Precisely said \code{Bhat} contains ones on the diagonal and the off-diagonal entry \eqn{(i,j)}{(i,j)} equals the \emph{negated} MLE of the regression coefficient for variable \eqn{j}{j} in the regression of variable \eqn{i}{i} on its parents. Note that this \eqn{(i,j)}{(i,j)} entry in \code{Bhat} corresponds to a directed edge \eqn{j \to i}{j -> i}, and thus to a one as \eqn{(j,i)}{(j,i)} entry in the adjacency matrix.} \item{Ohat}{matrix of the error covariances and variances of the residuals between regression equations associated with bi-directed edges and vertices with an arrowhead pointing at them.} \item{dev}{the `deviance' of the model.} \item{df}{the degrees of freedom.} \item{it}{the iterations.} } \references{ Drton, M. and Richardson, T. S. (2003). A new algorithm for maximum likelihood estimation in Gaussian graphical models for marginal independence. \emph{Proceedings of the Nineteenth Conference on Uncertainty in Artificial Intelligence}, 184-191. Drton, M. and Richardson, T. S. (2004). Iterative Conditional Fitting for Gaussian Ancestral Graph Models. Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence, Department of Statistics, 130-137. Richardson, T. S. and Spirtes, P. (2002). Ancestral Graph Markov Models. \emph{Annals of Statistics}. 30(4), 962-1030. } \author{Mathias Drton} \seealso{\code{\link{fitCovGraph}}, \code{\link{icf}}, \code{\link{makeMG}}, \code{\link{fitDag}} } \examples{ ## A covariance matrix "S" <- structure(c(2.93, -1.7, 0.76, -0.06, -1.7, 1.64, -0.78, 0.1, 0.76, -0.78, 1.66, -0.78, -0.06, 0.1, -0.78, 0.81), .Dim = c(4,4), .Dimnames = list(c("y", "x", "z", "u"), c("y", "x", "z", "u"))) ## The following should give the same fit. ## Fit an ancestral graph y -> x <-> z <- u fitAncestralGraph(ag1 <- makeMG(dg=DAG(x~y,z~u), bg = UG(~x*z)), S, n=100) ## Fit an ancestral graph y <-> x <-> z <-> u fitAncestralGraph(ag2 <- makeMG(bg= UG(~y*x+x*z+z*u)), S, n=100) ## Fit the same graph with fitCovGraph fitCovGraph(ag2, S, n=100) ## Another example for the mathematics marks data data(marks) S <- var(marks) mag1 <- makeMG(bg=UG(~mechanics*vectors*algebra+algebra*analysis*statistics)) fitAncestralGraph(mag1, S, n=88) mag2 <- makeMG(ug=UG(~mechanics*vectors+analysis*statistics), dg=DAG(algebra~mechanics+vectors+analysis+statistics)) fitAncestralGraph(mag2, S, n=88) # Same fit as above } \keyword{graphs} \keyword{models} \keyword{ancestral graph} \keyword{multivariate} ggm/man/marg.param.Rd0000644000176200001440000000347312312776012014123 0ustar liggesusers\name{marg.param} \alias{marg.param} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Link function of marginal log-linear parameterization } \description{ Provides the contrast and marginalization matrices for the marginal parametrization of a probability vector. } \usage{ marg.param(lev, type) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{lev}{ Integer vector containing the number of levels of each variable. } \item{type}{ A character vector with elements \code{"l"}, \code{"g"}, \code{"c"}, or \code{"r"} indicating the type of logit. The meaning is as follows: \code{"g"} for global, \code{"c"} for continuation, \code{"r"} for reverse continuation and \code{"l"} for local. } } \details{ See Bartolucci, Colombi and Forcina (2007). } \value{ \item{C}{Matrix of constrasts (the first \code{sum(lev)-length(r)} elements are referred to univariate logits)} \item{M}{Marginalization matrix with elements 0 and 1.} \item{G}{Corresponding design matrix for the corresponding log-linear model.} } \references{ Bartolucci, F., Colombi, R. and Forcina, A. (2007). An extended class of marginal link functions for modelling contingency tables by equality and inequality constraints. Statist. Sinica 17, 691-711. } \author{ Francesco Bartolucci, Antonio Forcina, Giovanni M. Marchetti } \note{ Assumes that the vector of probabilities is in inv lex order. The interactions are returned in order of dimension, like e.g., 1, 2, 3, 12, 13, 23, 123. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{mat.mlogit}} } \examples{ marg.param(c(3,3), c("l", "g")) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{logistic models} \keyword{ordinal models}% __ONLY ONE__ keyword per line ggm/man/SimpleGraphOperations.Rd0000644000176200001440000000300012312776012016337 0ustar liggesusers\name{Simple Graph Operations} \alias{bd} \alias{ch} \alias{pa} \title{Simple graph operations} \description{ Finds the boundary, children, parents of a subset of nodes of a graph. } \usage{ bd(nn, amat) ch(nn, amat) pa(nn, amat) } \arguments{ \item{nn}{a vector of nodes. It may either a numeric vector, or a character vector. If it is character vector must be a subset of the \code{rownames} of the edge matrix.} \item{amat}{a square matrix with dimnames specifying the adjacency matrix of the graph} } \details{For definitions of the operators see Lauritzen (1996). } \value{ The operators return a character vector specifying the boundary or the children or the parents of nodes \code{nn} in the graph. This is a numeric or a character vector depending on the mode of \code{nn}. } \references{Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press.} \author{Giovanni M. Marchetti} \seealso{\code{\link{UG}}, \code{\link{DAG}}} \examples{ ## find boundary of a subset of nodes of a DAG G <- DAG(y ~ x+b+a, b~a, x~a) bd("b", G) bd(c("b", "x"), G) bd("x", G) bd(c("x","b"), G) ## find boundary of a subset of nodes of an UG G <- UG(~ y*x*z + z*h*v) bd("z", G) bd(c("y", "x"), G) bd("v", G) bd(c("x","v"), G) ## children of a subset of nodes of a DAG G <- DAG(y ~ x+b+a, b~a, x~a) ch("b", G) ch(c("b", "x"), G) ch("x", G) ch(c("a","x"), G) ## parents of a subset of nodes of a DAG pa("b", G) pa(c("b", "x"), G) pa("x", G) pa(c("x","b"), G) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/adjMatrix.Rd0000644000176200001440000000126212312776012014013 0ustar liggesusers\name{adjMatrix} \alias{adjMatrix} \title{Adjacency matrix of a graph} \description{ Transforms the ``edge matrix'' of a graph into the adjacency matrix. } \usage{ adjMatrix(A) } \arguments{ \item{A}{a square matrix representing the edge matrix of a graph.} } \details{ Given the edge matrix \eqn{A} of a graph, this can be transformed into an adjacency matrix \eqn{E} with the formula \eqn{E = (A-I)^T}. } \value{ \item{E}{the adjacency matrix of the graph.} } \author{Giovanni M. Marchetti} \seealso{\code{\link{edgematrix}}} \examples{ amat <- DAG(y ~ x+z, z~u+v) E <- edgematrix(amat) adjMatrix(E) } \keyword{array} \keyword{algebra} \keyword{graphs} \keyword{multivariate} ggm/man/blkdiag.Rd0000644000176200001440000000131012312776012013457 0ustar liggesusers\name{blkdiag} \alias{blkdiag} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Block diagonal matrix } \description{ Block diagonal concatenation of input arguments. } \usage{ blkdiag(...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{ Variable number of matrices \code{M1, M2, ...}. } } \value{ A block diagonal matrix \code{diag(M1, M2, ...)}. } \author{ Giovanni M. Marchetti } \seealso{ \code{\link{diag}} } \examples{ X <- c(1,1,2,2); Z <- c(10, 20, 30, 40); A <- factor(c(1,2,2,2)) blkdiag(model.matrix(~X+Z), model.matrix(~A)) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{matrix} ggm/man/MSG.Rd0000644000176200001440000000746412336136237012535 0ustar liggesusers\name{MSG} \alias{MSG} \title{ Maximal summary graph } \description{ \code{MAG} generates and plots maximal summary graphs after marginalization and conditioning.} \usage{ MSG(amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, \dots) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{ An adjacency matrix of a MAG, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } \item{M}{ A subset of the node set of \code{a} that is going to be marginalized over} \item{C}{ Another disjoint subset of the node set of \code{a} that is going to be conditioned on. } % \item{byrownames}{ %A logical value. \code{TRUE} (by default) if \code{M} and \code{C} are %the subsets of the labels of the node set of the %graph or the row-name of the matrix of the first argument. \code{FALSE} %if \code{M} and \code{C} are the subsets of the set \eqn{\{1, \dots, n\}}, where \eqn{n} is the %number of the nodes. %} \item{showmat}{ A logical value. \code{TRUE} (by default) to print the generated matrix. } \item{plot}{ A logical value, \code{FALSE} (by default). \code{TRUE} to plot the generated graph. } \item{plotfun}{ Function to plot the graph when \code{plot == TRUE}. Can be \code{plotGraph} (the default) or \code{drawGraph}. } \item{\dots}{Further arguments passed to \code{plotfun}.} } \details{ This function uses the functions \code{\link{SG}} and \code{\link{Max}}. } \value{ A matrix that consists 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{ Richardson, T.S. and Spirtes, P. (2002). Ancestral graph Markov models. \emph{Annals of Statistics}, 30(4), 962-1030. Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic graphs. \emph{Submitted}. Sadeghi, K. and Lauritzen, S.L. (2011). Markov properties for loopless mixed graphs. \emph{Submitted}. URL \url{http://arxiv.org/abs/1109.5909}. Wermuth, N. (2011). Probability distributions with summary graph structure. \emph{Bernoulli}, 17(3), 845-879.} \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{MAG}}, \code{\link{Max}}, \code{\link{MRG}}, \code{\link{SG}}} \examples{ ex<-matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ##The adjacency matrix of a DAG 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0), 16, 16, byrow=TRUE) M <- c(3,5,6,15,16) C <- c(4,7) MSG(ex,M,C,plot=TRUE) ################################################### H<-matrix(c(0,100,1,0,100,0,100,0,0,100,0,100,0,1,100,0),4,4) Max(H) } \keyword{graphs} \keyword{directed acyclic graph} \keyword{marginalisation and conditioning} \keyword{maximality of graphs} \keyword{summary graph} ggm/man/RG.Rd0000644000176200001440000000605512336136265012413 0ustar liggesusers\name{RG} \alias{RG} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Ribbonless graph } \description{ \code{RG} generates and plots ribbonless graphs (a modification of MC graph to use m-separation) after marginalization and conditioning.} \usage{ RG(amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, \dots) } \arguments{ \item{amat}{ An adjacency matrix, or a graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } \item{M}{ A subset of the node set of \code{a} that is going to be marginalized over} \item{C}{ Another disjoint subset of the node set of \code{a} that is going to be conditioned on. } \item{showmat}{ A logical value. \code{TRUE} (by default) to print the generated matrix. } \item{plot}{ A logical value, \code{FALSE} (by default). \code{TRUE} to plot the generated graph. } \item{plotfun}{ Function to plot the graph when \code{plot == TRUE}. Can be \code{plotGraph} (the default) or \code{drawGraph}. } \item{\dots}{Further arguments passed to \code{plotfun}.} } \value{ A matrix that consists 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{ Koster, J.T.A. (2002). Marginalizing and conditioning in graphical models. \emph{Bernoulli}, 8(6), 817-840. Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic graphs. \emph{Submitted}. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{AG}},, \code{\link{MRG}}, \code{\link{SG}} } \examples{ ex <- matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ##The adjacency matrix of a DAG 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0),16,16, byrow = TRUE) M<-c(3,5,6,15,16) C<-c(4,7) RG(ex,M,C,plot=TRUE) } \keyword{graphs} \keyword{directed acyclic graph} \keyword{marginalisation and conditioning} \keyword{MC graph} \keyword{ribbonless graph} ggm/man/grMAT.Rd0000644000176200001440000000270713621530754013054 0ustar liggesusers\name{grMAT} \alias{grMAT} \title{Graph to adjacency matrix } \description{ \code{grMAT} generates the associated adjacency matrix to a given graph. } \usage{ grMAT(agr) } \arguments{ \item{agr}{ A graph that can be a \code{graphNEL} or an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). } } %\details{ %% ~~ If necessary, more details than the description above ~~ %} \value{ A matrix that consists 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } %\references{ %% ~put references to the literature/web site here ~ %} \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} %\seealso{ %\code{\link{adjGR}} %} \examples{ ## Generating the adjacency matrix from a vector exvec <-c ('b',1,2,'b',1,14,'a',9,8,'l',9,11,'a',10,8, 'a',11,2,'a',11,10,'a',12,1,'b',12,14,'a',13,10,'a',13,12) grMAT(exvec) } \keyword{graphs} \keyword{adjacency matrix} \keyword{mixed graph} \keyword{vector} ggm/man/binve.Rd0000644000176200001440000000312212312776012013170 0ustar liggesusers\name{binve} \alias{binve} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Inverts a marginal log-linear parametrization } \description{ Inverts a marginal log-linear parametrization. } \usage{ binve(eta, C, M, G, maxit = 500, print = FALSE, tol = 1e-10) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{eta}{ a vector of dimension \code{t-1} where \code{t} is the number of cells of a contingency table. } \item{C}{ A contrast matrix. } \item{M}{ A marginalization matrix. } \item{G}{ G is the model matrix of the loglinear parameterization with no constant term. } \item{maxit}{ an integer, specifying the maximum number of iterations. Default 500. } \item{print}{ a logical value: if \code{TRUE}, prints the criterion after each cycle. } \item{tol}{ A small value specifying the tolerance for the convergence criterion. Default: \code{1e-10}. } } \details{ A marginal log-linear link is defined by \eqn{\eta = C (M \log p)}. See Bartolucci et al. (2007). } \value{ A vector of probabilities \code{p}. } \references{ Bartolucci, F., Colombi, R. and Forcina, A. (2007). An extended class of marginal link functions for modelling contingency tables by equality and inequality constraints. Statist. Sinica 17, 691-711. } \author{ Antonio Forcina, Giovanni M. Marchetti } \note{ From a Matlab function by A. Forcina, University of Perugia, Italy. } \seealso{ \code{\link{mat.mlogit}} } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{marginal log-linear models} \keyword{discrete data} ggm/man/correlations.Rd0000644000176200001440000000173012312776012014574 0ustar liggesusers\name{correlations} \alias{correlations} \title{Marginal and partial correlations} \description{ Computes a correlation matrix with ones along the diagonal, marginal correlations in the lower triangle and partial correlations given all remaining variables in the upper triangle. } \usage{ correlations(x) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{a square symmetric matrix, a covariance matrix, or a data.frame for n observations and p variables.} } \value{ a square correlation matrix with marginal correlations (lower triangle) and partial correlations (upper triangle). } \references{Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman \& Hall.} \author{Giovanni M. Marchetti} \seealso{\code{\link{parcor}}, \code{\link{cor}}} \examples{ ## See Table 6.1 in Cox & Wermuth (1996) data(glucose) correlations(glucose) } \keyword{array} \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/dSep.Rd0000644000176200001440000000436512312776012012772 0ustar liggesusers\name{dSep} \alias{dSep} \title{d-separation} \description{ Determines if in a directed acyclic graph two set of nodes a d-separated by a third set of nodes. } \usage{ dSep(amat, first, second, cond) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{a Boolean matrix with dimnames, representing the adjacency matrix of a directed acyclic graph. The function does not check if this is the case. See the function \code{isAcyclic}. } \item{first}{a vector representing a subset of nodes of the DAG. The vector should be a character vector of the names of the variables matching the names of the nodes in \code{rownames(A)}. It can be also a numeric vector of indices. } \item{second}{a vector representing another subset of nodes of the DAG. The set \code{second} must be disjoint from \code{first}. The mode of \code{second} must match the mode of \code{first}.} \item{cond}{a vector representing a conditioning subset of nodes. The set \code{cond} must be disjoint from the other two sets and must share the same mode. } } \details{ d-separation is a fundamental concept introduced by Pearl (1988). } \value{ a logical value. \code{TRUE} if \code{first} and \code{second} are d-separated by \code{cond}. } \references{ Pearl, J. (1988). \emph{Probabilistic reasoning in intelligent systems.} San Mateo: Morgan Kaufmann. Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press. } \author{Giovanni M. Marchetti} \seealso{\code{\link{DAG}}, \code{\link{shipley.test}}, \code{\link{inducedCovGraph}}} \examples{ ## Conditioning on a transition node dSep(DAG(y ~ x, x ~ z), first="y", second="z", cond = "x") ## Conditioning on a collision node (collider) dSep(DAG(y ~ x, y ~ z), first="x", second="z", cond = "y") ## Conditioning on a source node dSep(DAG(y ~ x, z ~ x), first="y", second="z", cond = "x") ## Marginal independence dSep(DAG(y ~ x, y ~ z), first="x", second="z", cond = NULL) ## The DAG defined on p.~47 of Lauritzen (1996) dag <- DAG(g ~ x, h ~ x+f, f ~ b, x ~ l+d, d ~ c, c ~ a, l ~ y, y ~ b) dSep(dag, first="a", second="b", cond=c("x", "y")) dSep(dag, first="a", second=c("b", "d"), cond=c("x", "y")) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/AG.Rd0000644000176200001440000000600412336136655012367 0ustar liggesusers\name{AG} \alias{AG} \title{ Ancestral graph } \description{ \code{AG} generates and plots ancestral graphs after marginalization and conditioning. } \usage{ AG(amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, \dots) } \arguments{ \item{amat}{ An adjacency matrix, or a graph that can be of class \code{graphNEL-class} or an \code{\link{igraph}} object, or a vector of length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a sequence of triples (type, node1label, node2label). The type of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines).} \item{M}{ A subset of the node set of \code{a} that is going to be marginalized over} \item{C}{ Another disjoint subset of the node set of \code{a} that is going to be conditioned on. } \item{showmat}{ A logical value. \code{TRUE} (by default) to print the generated matrix. } \item{plot}{ A logical value, \code{FALSE} (by default). \code{TRUE} to plot the generated graph. } \item{plotfun}{ Function to plot the graph when \code{plot == TRUE}. Can be \code{plotGraph} (the default) or \code{drawGraph}. } \item{\dots}{Further arguments passed to \code{plotfun}.} } \value{ A matrix that is the adjacency matrix of the generated graph. It consists of 4 different integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple edges of different types. The matrix is symmetric w.r.t full lines and bi-directed arrows. } \references{ Richardson, T.S. and Spirtes, P. (2002). Ancestral graph Markov models. \emph{Annals of Statistics}, 30(4), 962-1030. Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic graphs. \emph{Submitted}. } \author{ Kayvan Sadeghi } %\note{ %% ~~further notes~~ %} \seealso{ \code{\link{MAG}}, \code{\link{RG}}, \code{\link{SG}} } \examples{ ##The adjacency matrix of a DAG ex<-matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0),16,16,byrow=TRUE) M <- c(3,5,6,15,16) C <- c(4,7) AG(ex, M, C, plot = TRUE) } \keyword{graphs} \keyword{ancestral graph} \keyword{directed acyclic graph} \keyword{marginalization and conditioning} ggm/man/conComp.Rd0000644000176200001440000000150412312776012013465 0ustar liggesusers\name{conComp} \alias{conComp} \title{Connectivity components} \description{ Finds the connectivity components of a graph. } \usage{ conComp(amat, method) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{amat}{a square matrix with dimnames, the adjacency matrix of an UG.} \item{method}{an integer 1 or 2 to choose the method used to find the components. Method 2 is more efficient for large graphs.} } \value{ an integer vector representing a partition of the set of nodes. } \references{Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press.} \author{Giovanni M. Marchetti} \seealso{\code{\link{UG}}} \examples{ ## three connected components conComp(UG(~a*c+c*d+e+g*o*u)) ## a connected graph conComp(UG(~ a*b+b*c+c*d+d*a)) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/isAcyclic.Rd0000644000176200001440000000234712312776012014000 0ustar liggesusers\name{isAcyclic} \alias{isAcyclic} %- Also NEED an '\alias' for EACH other topic documented here. \title{Graph queries} \description{ Checks if a given graph is acyclic. } \usage{ isAcyclic(amat, method = 2) } \arguments{ \item{amat}{a square Boolean matrix with dimnames, the adjacency matrix of a graph.} \item{method}{an integer 1 or 2 specifying the method used. If \code{method=1} the function calls the function \code{clusters} in package \code{igraph} to find the strong components: two nodes v and w are in the same strong component iff there are directed paths from v to w and from w to v. If \code{method=2} the function uses the \code{ggm} function \code{transClos}. Method 1 is faster.} } \value{ a logical value, \code{TRUE} if the graph is acyclic and \code{FALSE} otherwise. } \references{ Aho, A.V., Hopcroft, J.E. \& Ullman, J.D. (1983). \emph{Data structures and algorithms.} Reading: Addison-Wesley. } \author{David Edwards, Giovanni M. Marchetti} \examples{ ## A cyclic graph d <- matrix(0,3,3) rownames(d) <- colnames(d) <- c("x", "y", "z") d["x","y"] <- d["y", "z"] <- d["z", "x"] <- 1 ## Test if the graph is acyclic isAcyclic(d) isAcyclic(d, method = 1) } \keyword{graphs} \keyword{models} \keyword{multivariate} ggm/man/fitmlogit.Rd0000644000176200001440000000361212457671134014100 0ustar liggesusers\name{fitmlogit} \alias{fitmlogit} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Multivariate logistic models } \description{ Fits a logistic regression model to multivariate binary responses. } \usage{ fitmlogit(..., C = c(), D = c(), data, mit = 100, ep = 1e-80, acc = 1e-04) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{\dots}{ Model formulae of marginal logistic models for each response and for each association parameters (log-odds ratios). } \item{C}{ Matrix of equality constraints. } \item{D}{ Matrix of inequality cosntraints. } \item{data}{ A data frame containing the responses and the explanatory variables. } \item{mit}{ A positive integer: maximum number of iterations. Default: \code{100}. } \item{ep}{ A tolerance used in the algorithm: default \code{1e-80}. } \item{acc}{ A tolerance used in the algorithm: default \code{1e-4}. } } \details{ See Evans and Forcina (2011). } \value{ \item{LL}{The maximized log-likelihood.} \item{be}{The vector of the Maximum likelihood estimates of the parameters.} \item{S}{The estimated asymptotic covariance matrix.} \item{P}{The estimated cell probabilities for each individual.} } \references{ Evans, R.J. and Forcina, A. (2013). Two algorithms for fitting constrained marginal models. \emph{Computational Statistics and Data Analysis}, 66, 1-7. } \author{ Antonio Forcina, Giovanni M. Marchetti } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{glm}} } \examples{ data(surdata) out1 <- fitmlogit(A ~X, B ~ Z, cbind(A, B) ~ X*Z, data = surdata) out1$beta out2 <- fitmlogit(A ~X, B ~ Z, cbind(A, B) ~ 1, data = surdata) out2$beta } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. \keyword{multivariate} \keyword{logistic model}% __ONLY ONE__ keyword per line ggm/man/anger.Rd0000644000176200001440000000152712312776012013170 0ustar liggesusers\name{anger} \alias{anger} \docType{data} \title{ Anger data } \description{ Anger data } \usage{data(anger)} \format{ A covariance matrix for 4 variables measured on 684 female students. \describe{ \item{X}{anxiety state} \item{Y}{anger state} \item{Z}{anxiety trait} \item{U}{anger trait} } } \details{ Trait variables are viewed as stable personality characteristics, and state variables denote behaviour in specific situations. See Cox and Wermuth (1996). } \references{ Cox, D. R. and Wermuth, N. (1996). \emph{Multivariate dependencies}. London: Chapman and Hall. Cox, D.R. and Wermuth, N. (1990). \emph{An approximation to maximum likelihood estimates in reduced models}. 77(4), 747-761. } \examples{ # Fit a chordless 4-cycle model data(anger) G = UG(~ Y*X + X*Z + Z*U + U*Y) fitConGraph(G,anger, 684) } \keyword{datasets} ggm/DESCRIPTION0000644000176200001440000000075413622245342012543 0ustar liggesusersPackage: ggm Title: Graphical Markov Models with Mixed Graphs Version: 2.5 Date: 2020-02-014 Author: Giovanni M. Marchetti, Mathias Drton, Kayvan Sadeghi Maintainer: Giovanni M. Marchetti Depends: R (>= 3.6) Imports: igraph, methods, graph Description: Tools for marginalization, conditioning and fitting by maximum likelihood. License: GPL-2 NeedsCompilation: no Repository: CRAN Packaged: 2020-02-14 16:50:01 UTC; gmm Date/Publication: 2020-02-16 14:00:02 UTC ggm/NEWS0000644000176200001440000000251213621530036011522 0ustar liggesusersggm === - February 2020 Corrections required by CRAN - December 2019 Corrections in namespace - July 2016 Corrected data set marks (mechanics[57] is 35 not 36) - December 2014 Changed the Title in DESCRIPTION - June 2014 drawGraph uses layout, default adjust = FALSE. Now is possible to plot expressions in drawGraph Further options for edges. - May 2014 Included further argument plotfun in functions SG, RG, MSG, MRG, MAG, AG. - March 2014 Changed function isAcyclic eliminating dependence on RBGL Removed dependence on RBGL corrected the docs Fixed function fitConGraph - January 2014 fixed function icfmag corrected email addresses - October 2012 changed plotGraph - September 2012 corrected isAG, added example in makeMG added functionality do plotGraph - September 2012 added a new igraph output to plotGraph - August 2012 removed function `vertices` changed plotGraph - May 2012 corrected isAcyclic Improved isAcyclic. Thanks to David Edwards Changed conComp Removed cliques, thanks to Castelo. Removed edgematrix Modified fitConGraph with algorithm from Tibshirani Hastie Friedman. Modified function DAG New function fitDAG Modified icfmag (call to fitConGraph) Removed makeAG and unmakeAG new functions makeMG, unmakeMG, isAG and isADMG ggm/R/0000755000176200001440000000000013621530124011222 5ustar liggesusersggm/R/functions.R0000644000176200001440000045544213617225621013403 0ustar liggesusers`fitDAG` <- function (..., data) { ### Fits linear recursive regressions with independent residuals. # ..., a list of models # data, a data frame. mo = list(...) p = length(mo) D = DAG(...) nam = rownames(D) data = data[, nam] ## Existing responses resp = c() for( i in 1:p) { te = as.character(mo[[i]]) resp = c(resp, te[2]) } newresp = setdiff(nam, resp) for(k in 1:length(newresp)){ newmo = formula(paste(newresp[k], "~ 1")) mo = c(mo, newmo) } to = topOrder(D) o = match(nam[to], c(resp, newresp)) mo = mo[o] data = data[,to] beta = vector(p, mode = "list") delta = rep(0,p) n = nrow(data) lik = 0 df = 0 nomi = colnames(data) for(i in 1:length(mo)) { moi = mo[[i]] te = as.character(moi) if(te[2] == te[3]){ lik = lik + n * log(2*pi * var(data[, te[2]])) + (n-1) df = df + (n-1) next } else { m = lm(moi, data = data[1:i]) mq = summary(m) beta[[i]] = mq$coefficients[,1] delta[[i]] = (mq$sigma)^2 # Yh[, te[2]] = fitted(m) d = n - length(beta[i]) lik = lik + n * log(2*pi * delta[i]) + d df = df + d nm = paste(nomi[1:i], collapse = ",") cat(paste("\nModel:", te[2], te[1], te[3], " Margin: ",nm ,"\n")) print(mq$coefficients, digits = 4) } } # Shat <- cov(Yh) # Khat <- solve(Shat) # H <- S %*% Khat # trace <- function(A) sum(diag(A)) # dev <- (trace(H) - log(det(H)) - p) * n # df <- p*(p+1)/2 - sum(amat==1) - p list( bhat = beta, dhat=delta, dev=lik, df=df, lik = 2*lik) } #' Adjacency matrix of a graph #' #' Transforms the ``edge matrix'' of a graph into the adjacency matrix. #' #' Given the edge matrix \eqn{A} of a graph, this can be transformed into an #' adjacency matrix \eqn{E} with the formula \eqn{E = (A-I)^T}. #' #' @param A a square matrix representing the edge matrix of a graph. #' @return \item{E}{the adjacency matrix of the graph.} #' @author Giovanni M. Marchetti #' @seealso \code{\link{edgematrix}} #' @keywords array algebra graphs multivariate #' @examples #' #' amat <- DAG(y ~ x+z, z~u+v) #' E <- edgematrix(amat) #' adjMatrix(E) #' "adjMatrix" <- function (A) { ### From the edge matrix to the adjacency matrix E <- t(A) diag(E) <- 0 E } #' All edges of a graph #' #' Finds the set of edges of a graph. That is the set of undirected edges if #' the graph is undirected and the set of arrows if the graph is directed. #' #' #' @param amat a square Boolean matrix, with dimnames, the adjacency matrix of #' a graph. #' @return a matrix with two columns. Each row of the matrix is a pair of #' indices indicating an edge of the graph. If the graph is undirected, then #' only one of the pairs \eqn{(i,j), (j,i)} is reported. #' @author Giovanni M. Marchetti #' @seealso \code{\link{cycleMatrix}} #' @keywords graphs models multivariate #' @examples #' #' ## A UG graph #' allEdges(UG(~ y*v*k +v*k*d+y*d)) #' #' ## A DAG #' allEdges(DAG(u~h+o+p, h~o, o~p)) #' "allEdges" <- function(amat){ ### Finds all the edges of a graph with edge matrix amat. nn <- 1:nrow(amat) E <- c() if(all(amat == t(amat))) { amat[lower.tri(amat)] <- 0 } for(i in nn) { e <- nn[amat[i,] == 1] if(length(e) == 0) next li <- cbind(i, e) dimnames(li) <- list(rep("", length(e)), rep("", 2)) E <- rbind(E, li) } E } #' Basis set of a DAG #' #' Finds a basis set for the conditional independencies implied by a directed #' acyclic graph, that is a minimal set of independencies that imply all the #' other ones. #' #' Given a DAG and a pair of non adjacent nodes \eqn{(i,j)} such that \eqn{j} #' has higher causal order than \eqn{i}, the set of independency statements #' \eqn{i} independent of \eqn{j} given the union of the parents of both #' \eqn{i} and \eqn{j} is a basis set (see Shipley, 2000). This basis set has #' the property to lead to independent test statistics. #' #' @param amat a square matrix with dimnames representing the adjacency matrix #' of a DAG. #' @return a list of vectors representing several conditional independence #' statements. Each vector contains the names of two non adjacent nodes #' followed by the names of nodes in the conditioning set (which may be empty). #' @author Giovanni M. Marchetti #' @seealso \code{\link{shipley.test}}, \code{\link{dSep}}, \code{\link{DAG}} #' @references Shipley, B. (2000). A new inferential test for path models based #' on directed acyclic graphs. \emph{Structural Equation Modeling}, 7(2), #' 206--218. #' @keywords graphs models multivariate #' @examples #' #' ## See Shipley (2000), Figure 2, p. 213 #' A <- DAG(x5~ x3+x4, x3~ x2, x4~x2, x2~ x1) #' basiSet(A) #' "basiSet" <- function(amat){ ### Basis set of a DAG with adjacency matrix amat. amat <- topSort(amat) nod <- rownames(amat) dv <- length(nod) ind <- NULL ## NOTE. This is correct if the adj mat is upper triangular. for(r in 1:dv){ for(s in r:dv) { if((amat[r,s] != 0) | (s==r)) next else{ ed <- nod[c(r,s)] pa.r <- nod[amat[,r] == 1] pa.s <- nod[amat[,s] == 1] dsep <- union(pa.r, pa.s) dsep <- setdiff(dsep, ed) b <- list(c(ed, dsep)) ind <- c(ind, b) } } } ## ind <- lapply(ind, function(x) nn[x]) ind } "bd" <- function (nn, amat) { ### Boundary of the nodes nn for a graph with adjacency matrix amat. nod <- rownames(amat) if(is.null(nod)) stop("The edge matrix must have dimnames!") if(!all(is.element(nn, nod))) stop("Some of the nodes are not among the vertices.") b <- vector(length(nn), mode="list") diag(amat) <- 0 # As you do not want the node itself in the list k <- length(nn) for(i in 1:k) { b[[i]] <- c( nod[amat[nn[i], ]==1 ],nod[amat[,nn[i]]==1 ] ) } b <- unique(unlist(b)) setdiff(b, nn) } #' Breadth first search #' #' Breadth-first search of a connected undirected graph. #' #' Breadth-first search is a systematic method for exploring a graph. The #' algorithm is taken from Aho, Hopcroft \& Ullman (1983). #' #' @param amat a symmetric matrix with dimnames specifying the adjacency matrix #' of the undirected graph #' @param v an integer, indicating the starting node of the search. Defaults to #' the first node. #' @return \item{tree}{the edge matrix of the resulting spanning tree} #' \item{branches}{a matrix with two columns, giving the indices of the #' branches of the spanning tree} \item{chords}{a matrix with two columns, #' giving the indices of the chords of the spanning tree} #' @author Giovanni M. Marchetti #' @seealso \code{\link{UG}}, \code{\link{findPath}}, \code{\link{cycleMatrix}} #' @references Aho, A.V., Hopcrtoft, J.E. \& Ullman, J.D. (1983). \emph{Data #' structures and algorithms.} Reading: Addison-Wesley. #' #' Thulasiraman, K. \& Swamy, M.N.S. (1992). \emph{Graphs: theory and #' algorithms}. New York: Wiley. #' @keywords graphs models multivariate #' @examples #' #' ## Finding a spanning tree of the butterfly graph #' bfsearch(UG(~ a*b*o + o*u*j)) #' ## Starting from another node #' bfsearch(UG(~ a*b*o + o*u*j), v=3) #' "bfsearch" <- function(amat, v=1) { ### Breadth-first search of a connected UG with adjacency matrix amat. n <- nrow(amat) indices <- 1:n if(n==1) return(NULL) visited <- rep(0, n) Q <- c() tree <- matrix(0, n,n) dimnames(tree) <- dimnames(amat) E <- c() visited[v] <- 1 Q <- c(v, Q) while(!all(visited==1)){ x <- Q[1] Q <- Q[-1] ## b <- bd(x, amat) b <- indices[amat[x,]==1] # Boundary of x. Assumes that amat is symmetric for(y in b){ if(visited[y] == 0){ visited[y] <- 1 Q <- c(Q, y) tree[x,y]<- 1 ; tree[y,x] <- 1 E <- rbind(E, c(x,y)) } } } cross <- amat - tree V <- allEdges(cross) dimnames(E) <- list(rep("", nrow(E)), rep("", 2)) list(tree = tree, branches = E,chords = V ) } "ch" <- function (nn, amat) { ### List of the children of nodes nn for a given with adjacency matrix amat. nod <- rownames(amat) if(is.null(nod)) stop("The adjacency matrix must have dimnames!") if(!all(is.element(nn, nod))) stop("Some of the nodes are not among the vertices.") k <- length(nn) p <- vector(k, mode="list") A <- 0 + ((amat != t(amat)) & (amat == 1)) # Select the directed edges for(i in 1:k) { p[[i]] <- nod[A[nn[i],]==1 ] } setdiff(unique(unlist(p)), nn) } #' Identifiability of a model with one latent variable #' #' Checks four sufficient conditions for identifiability of a Gaussian DAG #' model with one latent variable. #' #' Stanghellini and Wermuth (2005) give some sufficient conditions for checking #' if a Gaussian model that factorizes according to a DAG is identified when #' there is one hidden node over which we marginalize. Specifically, the #' function checks the conditions of Theorem 1, (i) and (ii) and of Theorem 2 #' (i) and (ii). #' #' @param amat a square matrix with dimnames, representing the adjacency matrix #' of a DAG. #' @param latent an integer representing the latent variables among the nodes, #' or the name of the node. #' @return a vector of length four, indicating if the model is identified #' according to the conditions of theorems 1 and 2 in Stanghellini \& Wermuth #' (2005). The answer is \code{TRUE} if the condition holds and thus the model #' is globally identified or \code{FALSE} if the condition fails, and thus we #' do not know if the model is identifiable. #' @author Giovanni M. Marchetti #' @seealso \code{\link{isGident}}, \code{\link{InducedGraphs}} #' @references Stanghellini, E. \& Wermuth, N. (2005). On the identification of #' path-analysis models with one hidden variable. \emph{Biometrika}, 92(2), #' 337-350. #' @keywords graphs models multivariate #' @examples #' #' ## See DAG in Figure 4 (a) in Stanghellini & Wermuth (2005) #' d <- DAG(y1 ~ y3, y2 ~ y3 + y5, y3 ~ y4 + y5, y4 ~ y6) #' checkIdent(d, "y3") # Identifiable #' checkIdent(d, "y4") # Not identifiable? #' #' ## See DAG in Figure 5 (a) in Stanghellini & Wermuth (2005) #' d <- DAG(y1 ~ y5+y4, y2 ~ y5+y4, y3 ~ y5+y4) #' checkIdent(d, "y4") # Identifiable #' checkIdent(d, "y5") # Identifiable #' #' ## A simple function to check identifiability for each node #' #' is.ident <- function(amat){ #' ### Check suff. conditions on each node of a DAG. #' p <- nrow(amat) #' ## Degrees of freedom #' df <- p*(p+1)/2 - p - sum(amat==1) - p + 1 #' if(df <= 0) #' warning(paste("The degrees of freedom are ", df)) #' a <- rownames(amat) #' for(i in a) { #' b <- checkIdent(amat, latent=i) #' if(TRUE %in% b) #' cat("Node", i, names(b)[!is.na(b)], "\n") #' else #' cat("Unknown.\n") #' } #' } #' `checkIdent` <- function(amat, latent) { ### Checks SW sufficient conditions for identifiability of a DAG ### with adjacency matrix edge amat and one latent variable. "allSubsets" <- function (n) { ## Returns all subsets of n p <- length(n) H <- data.matrix(expand.grid(rep(list(1:2), p))) - 1 H <- split(H==1, row(H)) lapply(H, function(i) n[i]) } nod <- rownames(amat) if(is.null(nod)) stop("The adjacency matrix must have dimnames.") gcov <- inducedCovGraph(amat, sel=nod, cond=NULL); gcov <- sign(gcov) L <- latent if(length(L) > 1) stop("I have an answer only for one latent variable.") O <- setdiff(nod, L) m <- bd(L, gcov) ## Theorem 1 if(length(m) > 2){ G <- inducedCovGraph(amat, sel=O, cond=L); G <- sign(G) cond.i <- isGident(G[m,m,drop=FALSE]) } else cond.i <- FALSE gcon <- inducedConGraph(amat, sel=nod, cond=NULL) ; gcon <- sign(gcon) cc <- bd(L, gcon) if(length(cc) > 2) { cond.ii <- isGident(gcon[cc, cc, drop=FALSE]) } else cond.ii <- FALSE ## Theorem 2 (revised) a <- union(pa(L, amat), ch(L, amat)) if(length(a) > 2){ Oa <- setdiff(O, a) # O \ a S.a <- inducedCovGraph(amat, sel=union(Oa, L), cond=a); S.a <- sign(S.a) first <- S.a[Oa, L] first <- Oa[first==0] # variables that satisfy condition (i) of thm. 2. if(length(first)==0){ cond.iii <- FALSE } else { cond.iii <- FALSE H <- allSubsets(first) # in all subsets of these variables for(h in H){ # look for a G-identifiable cov. or con. graph isgid <- isGident(sign(inducedCovGraph(amat, sel=a, cond=union(L, h)))) if(isgid){ cond.iii <- TRUE break } else{ isgid <- isGident(sign(inducedConGraph(amat, sel=a, cond=union(L, h)))) if(isgid){ cond.iii <- TRUE break } } } } second <- setdiff(O,m) # variables that satisfy condition (ii) of thm. 2. if(length(second)==0){ cond.iv <- FALSE } else { cond.iv <- FALSE H <- allSubsets(second) # in all subsets of these variables for(h in H){ # look for a G-identifiable cov. or con. graph isgid <- isGident(sign(inducedCovGraph(amat, sel=a, cond=union(L, h)))) if(isgid){ cond.iv <- TRUE break } else{ isgid <- isGident(sign(inducedConGraph(amat, sel=a, cond=union(L, h)))) if(isgid){ cond.iv <- TRUE break } } } } } else{ cond.iii <- FALSE cond.iv <- FALSE } c(T1.i = cond.i, T1.ii = cond.ii, T2.i = cond.iii, T2.ii = cond.iv) } #' The complementary graph #' #' Finds the complementary graph of an undirected graph. #' #' The complementary graph of an UG is the graph that has the same set of nodes #' and an undirected edge connecting \eqn{i} and \eqn{j} whenever there is not #' an \eqn{(i,j)} edge in the original UG. #' #' @param amat the adjacency matrix of an undirected graph #' @return the edge matrix of the complementary graph. #' @author Giovanni M. Marchetti #' @seealso \code{\link{UG}}, \code{\link{DAG}} #' @references Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon #' Press. #' @keywords graphs models multivariate #' @examples #' #' ## A chordless four-cycle #' four <- UG(~ a*b + b*d + d*e + e*a) #' four #' cmpGraph(four) #' "cmpGraph" <- function(amat){ ### Adjacency matrix of the complementary graph g <- 1*!amat diag(g) <- 0 g } #' Connectivity components #' #' Finds the connectivity components of a graph. #' #' #' @param amat a square matrix with dimnames, the adjacency matrix of an UG. #' @param method an integer 1 or 2 to choose the method used to find the #' components. Method 2 is more efficient for large graphs. #' @return an integer vector representing a partition of the set of nodes. #' @author Giovanni M. Marchetti #' @seealso \code{\link{UG}} #' @references Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon #' Press. #' @keywords graphs models multivariate #' @examples #' #' ## three connected components #' conComp(UG(~a*c+c*d+e+g*o*u)) #' ## a connected graph #' conComp(UG(~ a*b+b*c+c*d+d*a)) #' `conComp` <- function (amat, method = 1) ### Finds the connected components of an UG graph from its adjacency matrix amat. { if (!all(amat == t(amat))) stop("Not an undirected graph.") if(method == 2){ u <- clusters(graph.adjacency(amat, mode="undirected"))$membership + 1 return(u) } else if (method == 1){ A <- transClos(amat) diag(A) <- 1 n <- nrow(A) A <- sign(A + t(A)) u <- A %*% 2^((n - 1):0) return(match(u, unique(u))) } else{ stop("Wrong method.")} } #' Marginal and partial correlations #' #' Computes a correlation matrix with ones along the diagonal, marginal #' correlations in the lower triangle and partial correlations given all #' remaining variables in the upper triangle. #' #' #' @param x a square symmetric matrix, a covariance matrix, or a data.frame for #' n observations and p variables. #' @return a square correlation matrix with marginal correlations (lower #' triangle) and partial correlations (upper triangle). #' @author Giovanni M. Marchetti #' @seealso \code{\link{parcor}}, \code{\link{cor}} #' @references Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' @keywords array graphs models multivariate #' @examples #' #' ## See Table 6.1 in Cox & Wermuth (1996) #' data(glucose) #' correlations(glucose) #' "correlations" <- function (x) { ### Marginal correlations (lower half) and ### partial correlations given all remaining variables (upper half). if(is.data.frame(x)) r <- cor(x) else { # Recomputes the corr matrix Dg <- 1/sqrt(diag(x)) r <- x * outer(Dg, Dg) } rp <- parcor(r) r[upper.tri(r)] <- rp[upper.tri(rp)] r } #' Fundamental cycles #' #' Finds the matrix of fundamental cycles of a connected undirected graph. #' #' All the cycles in an UG can be obtained from combination (ring sum) of the #' set of fundamental cycles. The matrix of fundamental cycles is a Boolean #' matrix having as rows the fundamental cycles and as columns the edges of the #' graph. If an entry is one then the edge associated to that column belongs to #' the cycle associated to the row. #' #' @param amat a symmetric matrix with dimnames denoting the adjacency matrix #' of the undirected graph. The graph must be connected, otherwise the function #' returns an error message. #' @return a Boolean matrix of the fundamental cycles of the undirected graph. #' If there is no cycle the function returns \code{NULL}. #' @note This function is used by \code{isGident}. The row sum of the matrix #' gives the length of the cycles. #' @author Giovanni M. Marchetti #' @seealso \code{\link{UG}}, \code{\link{findPath}}, \code{\link{fundCycles}}, #' \code{\link{isGident}}, \code{\link{bfsearch}} #' @references Thulasiraman, K. \& Swamy, M.N.S. (1992). \emph{Graphs: theory #' and algorithms}. New York: Wiley. #' @keywords graphs models multivariate #' @examples #' #' ## Three cycles #' cycleMatrix(UG(~a*b*d+d*e+e*a*f)) #' ## No cycle #' cycleMatrix(UG(~a*b)) #' ## two cycles: the first is even and the second is odd #' cm <- cycleMatrix(UG(~a*b+b*c+c*d+d*a+a*u*v)) #' apply(cm, 1, sum) #' "cycleMatrix" <- function(amat){ ### Fundamental Cycle matrix of the UG amat. fc <- fundCycles(amat) # Edges of the fundamental cycles E <- allEdges(amat) # All the edges of the graph n <- nrow(E) # Number of edges k <- length(fc) # Number of FC if(k == 0) return(NULL) cmat <- matrix(0, k, n) for(cy in 1:k) { M <- fc[[cy]] # Edges in cycle cy for(j in 1:nrow(M)) { e <- sort(M[j,]) for(i in 1:n){ cmat[cy, i] <- cmat[cy, i] | all(E[i,] == e) } } } dimnames(cmat) <- list(1:k, paste(E[,1], E[,2])) cmat } #' Directed acyclic graphs (DAGs) #' #' A simple way to define a DAG by means of regression model formulae. #' #' The DAG is defined by a sequence of recursive regression models. Each #' regression is defined by a model formula. For each formula the response #' defines a node of the graph and the explanatory variables the parents of #' that node. If the regressions are not recursive the function returns an #' error message. #' #' Some authors prefer the terminology acyclic directed graphs (ADG). #' #' @param \dots a sequence of model formulae #' @param order logical, defaulting to \code{FALSE}. If \code{TRUE} the nodes #' of the DAG are permuted according to the topological order. If \code{FALSE} #' the nodes are in the order they first appear in the model formulae (from #' left to right). #' @return the adjacency matrix of the DAG, i.e. a square Boolean matrix of #' order equal to the number of nodes of the graph and a one in position #' \eqn{(i,j)} if there is an arrow from \eqn{i} to \eqn{j} and zero otherwise. #' The rownames of the adjacency matrix are the nodes of the DAG. #' #' If \code{order = TRUE} the adjacency matrix is permuted to have parents #' before children. This can always be done (in more than one way) for DAGs. #' The resulting adjacency matrix is upper triangular. #' @note The model formulae may contain interactions, but they are ignored in #' the graph. #' @author G. M. Marchetti #' @seealso \code{\link{UG}}, \code{\link{topSort}}, \code{\link{edgematrix}}, #' \code{\link{fitDag}} #' @references Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon #' Press. #' @keywords graphs models multivariate #' @examples #' #' ## A Markov chain #' DAG(y ~ x, x ~ z, z ~ u) #' #' ## Another DAG #' DAG(y ~ x + z + u, x ~ u, z ~ u) #' #' ## A DAG with an isolated node #' DAG(v ~ v, y ~ x + z, z ~ w + u) #' #' ## There can be repetitions #' DAG(y ~ x + u + v, y ~ z, u ~ v + z) #' #' ## Interactions are ignored #' DAG(y ~ x*z + z*v, x ~ z) #' #' ## A cyclic graph returns an error! #' \dontrun{DAG(y ~ x, x ~ z, z ~ y)} #' #' ## The order can be changed #' DAG(y ~ z, y ~ x + u + v, u ~ v + z) #' #' ## If you want to order the nodes (topological sort of the DAG) #' DAG(y ~ z, y ~ x + u + v, u ~ v + z, order=TRUE) #' "DAG" <- function (...,order=FALSE) { ### Defines a DAG from a set of equations (defined with model formulae). f <- list(...) nb <- length(f) # nb is the number of model formulae (of blocks) nod <- c() # Counts the number of nodes for (k in 1:nb) { tt <- terms(f[[k]], specials="I") vars <- dimnames(attr(tt, "factors"))[[1]] skip <- attr(tt, "specials")$I if(!is.null(skip)) vars <- vars[-skip] nod <- c(nod, vars) } N <- unique(nod) # set of nodes dN <- length(N) # number of nodes amat <- matrix(0,dN,dN) for (k in 1:nb) { tt <- terms(f[[k]], specials = "I") vars <- dimnames(attr(tt, "factors"))[[1]] if (attr(tt, "response") == 1) { j <- match(vars[1], N) i <- match(vars[-1], N) amat[i, j] <- 1 } else if (attr(tt, "response") == 0) stop("Some equations have no response") } if(!isAcyclic(amat)) warning("The graph contains directed cycles!") dimnames(amat) <- list(N, N) if(order){ amat <- topSort(amat) } amat } #' Drawing a graph with a simple point and click interface. #' #' Draw a graph from its adjacency matrix representation. #' #' The function is a very simple tool useful for displaying small graphs, with #' a rudimentary interface for moving nodes and edges of a given graph and #' adjusting the final plot. For better displays use \pkg{dynamicGraph} or #' \pkg{Rgraphviz} package in Bioconductor project. #' #' @param amat the adjacency matrix representation of the graph. This can be an #' undirected graph, a directed acyclic graph or a mixed graph with at most a #' summary graph structure. See also \code{\link{plotGraph}} #' @param coor an optional matrix of dimensions \code{p} x 2 where \eqn{p} is #' the number of vertices of the graph. If \code{coor=NULL} then the function #' chooses a default position for the nodes. #' @param adjust a logical value, defaults to \code{FALSE}. If \code{TRUE} the #' graph is plotted and the system waits until the mouse button is pressed #' (same behaviour of \code{locator} function. #' @param alpha a positive value between controlling the distance from the end #' of the edges to the nodes of the graph. #' @param beta a positive value controlling the distance of the labels of the #' variables from the nodes. #' @param lwd line width of the edges (default: 1). #' @param ecol color of the edges (default: "blue"). #' @param bda bidirected edge arrow length (default: 0.1). #' @param layout The name of a function used to compute the (initial) layout of #' the graph. The default is \code{layout.auto}. This can be further adjusted #' if \code{adjust} is \code{TRUE}. #' @return The function plots the graph with a initial positioning of the #' nodes, as specified by \code{coor} and remains in a waiting state. The #' position of each node can be shifted by pointing and clicking (with the #' first mouse button) close to the node. When the mouse button is pressed the #' node which is closer to the selected point is moved to that position. Thus, #' one must be careful to click closer to the selected node than to any other #' node. The nodes can be moved to any position by repeating the previous #' operation. The adjustment process is terminated by pressing any mouse #' button other than the first. #' #' At the end of the process, the function returns invisibly the coordinates of #' the nodes. The coordinates may be used later to redisplay the graph. #' @author Giovanni M. Marchetti #' @seealso \code{\link{UG}}, \code{\link{DAG}}, \code{\link{makeMG}}, #' \code{\link{plotGraph}} #' @references \pkg{dynamicGraph}, \pkg{Rgraphwiz}, #' \url{http://www.bioconductor.org}. #' #' GraphViz, Graph Visualization Project. AT\&T Research. #' \url{http://www.graphviz.org}. #' @keywords graphs hplot iplot #' @examples #' #' ## A directed acyclic graph #' d <- DAG(y1 ~ y2+y6, y2 ~ y3, y3 ~ y5+y6, y4 ~ y5+y6) #' \dontrun{drawGraph(d)} #' #' ## An undirected graph #' g <- UG(~giova*anto*armo + anto*arj*sara) #' \dontrun{drawGraph(d)} #' #' ## An ancestral graph #' ag <- makeMG(ug=UG(~y0*y1), dg=DAG(y4~y2, y2~y1), bg=UG(~y2*y3+y3*y4)) #' drawGraph(ag, adjust = FALSE) #' drawGraph(ag, adjust = FALSE) #' #' ## A more complex example with coordinates: the UNIX evolution #' xy <- #' structure(c(5, 15, 23, 25, 26, 17, 8, 6, 6, 7, 39, 33, 23, 49, #' 19, 34, 13, 29, 50, 68, 70, 86, 89, 64, 81, 45, 64, 49, 64, 87, #' 65, 65, 44, 37, 64, 68, 73, 85, 83, 95, 84, 0, 7, 15, 27, 44, #' 37, 36, 20, 51, 65, 44, 64, 59, 73, 69, 78, 81, 90, 97, 89, 72, #' 85, 74, 62, 68, 59, 52, 48, 43, 50, 34, 21, 18, 5, 1, 10, 2, #' 11, 2, 1, 44), .Dim = c(41, 2), .Dimnames = list(NULL, c("x", #' "y"))) #' Unix <- DAG( #' SystemV.3 ~ SystemV.2, #' SystemV.2 ~ SystemV.0, #' SystemV.0 ~ TS4.0, #' TS4.0 ~ Unix.TS3.0 + Unix.TS.PP + CB.Unix.3, #' PDP11.SysV ~ CB.Unix.3, #' CB.Unix.3 ~ CB.Unix.2, #' CB.Unix.2 ~ CB.Unix.1, #' Unix.TS.PP ~ CB.Unix.3, #' Unix.TS3.0 ~ Unix.TS1.0 + PWB2.0 + USG3.0 + Interdata, #' USG3.0 ~ USG2.0, #' PWB2.0 ~ Interdata + PWB1.2, #' USG2.0 ~ USG1.0, #' CB.Unix.1 ~ USG1.0, #' PWB1.2 ~ PWB1.0, #' USG1.0 ~ PWB1.0, #' PWB1.0 ~ FifthEd, #' SixthEd ~ FifthEd, #' LSX ~ SixthEd, #' MiniUnix ~ SixthEd, #' Interdata ~ SixthEd, #' Wollongong ~ SixthEd, #' SeventhEd ~ Interdata, #' BSD1 ~ SixthEd, #' Xenix ~ SeventhEd, #' V32 ~ SeventhEd, #' Uniplus ~ SeventhEd, #' BSD3 ~ V32, #' BSD2 ~ BSD1, #' BSD4 ~ BSD3, #' BSD4.1 ~ BSD4, #' EigthEd ~ SeventhEd + BSD4.1, #' NinethEd ~ EigthEd, #' Ultrix32 ~ BSD4.2, #' BSD4.2 ~ BSD4.1, #' BSD4.3 ~ BSD4.2, #' BSD2.8 ~ BSD4.1 + BSD2, #' BSD2.9 ~ BSD2.8, #' Ultrix11 ~ BSD2.8 + V7M + SeventhEd, #' V7M ~ SeventhEd #' ) #' drawGraph(Unix, coor=xy, adjust=FALSE) #' # dev.print(file="unix.fig", device=xfig) # Edit the graph with Xfig #' `drawGraph` <- function (amat, coor = NULL, adjust = FALSE, alpha = 1.5, beta = 3, lwd = 1, ecol = "blue", bda = 0.1, layout = layout.auto) { if (is.null(dimnames(amat))) { rownames(a) <- 1:ncol(amat) colnames(a) <- 1:ncol(amat) } if (all(amat == t(amat)) & all(amat[amat != 0] == 1)) { amat <- amat * 10 } `lay` = function(a, directed = TRUE, start = layout){ if (class(a)[1] == "igraph" || class(a)[1] == "graphNEL" || class(a)[1] == "character") { a <- grMAT(a) } if (is(a,"matrix")) { if (nrow(a) == ncol(a)) { if (length(rownames(a)) != ncol(a)) { rownames(a) <- 1:ncol(a) colnames(a) <- 1:ncol(a) } if (!directed) { if (all(a == t(a)) & all(a[a != 0] == 1)) { a <- a * 10 } } l1 <- c() l2 <- c() for (i in 1:nrow(a)) { for (j in i:nrow(a)) { if (a[i, j] == 1) { l1 <- c(l1, i, j) l2 <- c(l2, 2) } if (a[j, i]%%10 == 1) { l1 <- c(l1, j, i) l2 <- c(l2, 2) } if (a[i, j] == 10) { l1 <- c(l1, i, j) l2 <- c(l2, 0) } if (a[i, j] == 11) { l1 <- c(l1, i, j, i, j) l2 <- c(l2, 2, 0) } if (a[i, j] == 100) { l1 <- c(l1, i, j) l2 <- c(l2, 3) } if (a[i, j] == 101) { l1 <- c(l1, i, j, i, j) l2 <- c(l2, 2, 3) } if (a[i, j] == 110) { l1 <- c(l1, i, j, i, j) l2 <- c(l2, 0, 3) } if (a[i, j] == 111) { l1 <- c(l1, i, j, i, j, i, j) l2 <- c(l2, 2, 0, 3) } } } } else { stop("'object' is not in a valid adjacency matrix form") } if (length(l1) > 0) { ## l1 <- l1 - 1 # igraph0 agr <- graph(l1, n = nrow(a), directed = TRUE) } } else { stop("'object' is not in a valid format") } x = start(agr) x[,1] = 10 + 80 * (x[,1] - min(x[,1])) / (max(x[,1]) - min(x[,1])) x[,2] = 10 + 80 * (x[,2] - min(x[,2])) / (max(x[,2]) - min(x[,2])) x } plot.dots <- function(xy, v, dottype, n, beta) { for (i in 1:n) { if (dottype[i] == 1) { points(xy[i, 1], xy[i, 2], pch = 1, cex = 1.2, lwd = lwd) } else if (dottype[i] == 2) { points(xy[i, 1], xy[i, 2], pch = 16, cex = 1.2) } } text(xy[, 1] - beta, xy[, 2] + 2*beta, v, cex = 1.2) } angle <- function(v, alpha = pi/2) { theta <- Arg(complex(real = v[1], imaginary = v[2])) z <- complex(argument = theta + alpha) c(Re(z), Im(z)) } double.edges <- function(x1, x2, y1, y2, lwd, ecol) { d <- 50 n <- 30 dd <- 2 k <- length(x1) if (is.na(x1)) return() for (i in 1:k) { x <- c(x1[i], x2[i]) y <- c(y1[i], y2[i]) m <- (x + y)/2 cen <- m + d * angle(y - x) xm <- x - cen ym <- y - cen thetax <- Arg(complex(real = xm[1], imaginary = xm[2])) thetay <- Arg(complex(real = ym[1], imaginary = ym[2])) theta <- seq(thetax, thetay, len = n) l <- crossprod(y - m) delta <- sqrt(d^2 + l) lx <- cen[1] + delta * cos(theta) ly <- cen[2] + delta * sin(theta) lines(lx, ly, lty = 2, col = ecol, lwd = lwd) vy <- angle(y - cen) vx <- angle(x - cen) vx1 <- x + dd * angle(vx, alpha = pi/12) vx2 <- x + dd * angle(vx, alpha = -pi/12) vy1 <- y + dd * angle(vy, alpha = 11 * pi/12) vy2 <- y + dd * angle(vy, alpha = -11 * pi/12) segments(x[1], x[2], vx1[1], vx1[2], col = ecol, lwd = lwd) segments(x[1], x[2], vx2[1], vx2[2], col = ecol, lwd = lwd) segments(y[1], y[2], vy1[1], vy1[2], col = ecol, lwd = lwd) segments(y[1], y[2], vy2[1], vy2[2], col = ecol, lwd = lwd) ex = x + 0.05 * (y - x) ey = x + 0.95 * (y - x) arrows(ex[1], ex[2], ey[1], ey[2], lty = 1, code = 1, angle = 20, length = 0.1, lwd = lwd, col = ecol) } } draw.edges <- function(coor, u, alpha, type, lwd, ecol, bda) { for (k in 1:nrow(u)) { a <- coor[u[k, 1], ] b <- coor[u[k, 2], ] ba <- b - a ba <- ba/sqrt(sum(ba * ba)) x <- a + ba * alpha y <- b - ba * alpha switch(type + 1, segments(x[1], x[2], y[1], y[2], lty = 1, lwd = lwd, col = ecol), arrows(x[1], x[2], y[1], y[2], code = 2, angle = 20, length = 0.1, lty = 1, lwd = lwd, col = ecol), arrows(x[1], x[2], y[1], y[2], code = 3, angle = 20, length = bda, lty = 5, lwd = lwd, col = ecol), double.edges(x[1], x[2], y[1], y[2], lwd = lwd, ecol)) } } # def.coor <- function(ce, k, h, w) { # if (k == 1) # return(ce) # else if (k == 2) { # r1 <- c(ce[1], ce[1]) # r2 <- c(ce[2] + h * 0.3, ce[2] - h * 0.3) # } # else if (k == 3) { # r1 <- c(ce[1], ce[1], ce[1]) # r2 <- c(ce[2] + h * 0.25, ce[2], ce[2] - h * 0.25) # } # else if (k == 4) { # r1 <- c(ce[1] - w * 0.3, ce[1] + w * 0.3, ce[1] + # w * 0.3, ce[1] - w * 0.3) # r2 <- c(ce[2] - h * 0.3, ce[2] - h * 0.3, ce[2] + # h * 0.3, ce[2] + h * 0.3) # } # else { # a <- 1 # z <- seq(a, a + 2 * pi, len = k + 1) # z <- z[-1] # r1 <- ce[1] + w/2.5 * cos(z) # r2 <- ce[2] + h/2.5 * sin(z) # } # cbind(r1, r2) # } # def.coor.dag <- function(amat, w, h, left) { # nod <- rownames(amat) # o <- topOrder(amat) # if (left) # o <- rev(o) # k <- length(nod) # x <- seq(0, 100, len = k) # y <- rep(c(20, 40, 60, 80), ceiling(k/4))[1:k] # xy <- cbind(x, y) # rownames(xy) <- nod[o] # xy[nod, ] # } v <- parse(text = rownames(amat)) n <- length(v) dottype <- rep(1, n) old <- par(mar = c(0, 0, 0, 0)) on.exit(par(old)) plot(c(0, 100), c(0, 100), type = "n", axes = FALSE, xlab = "", ylab = "") center <- matrix(c(50, 50), ncol = 2) if (is.null(coor)) { coor <- lay(amat) # isdag <- isAcyclic(amat) # if (isdag) # coor <- def.coor.dag(amat, 100, 100, left = left) # else coor <- def.coor(center, n, 100, 100) } g0 <- amat * ((amat == 10) & (t(amat) == 10)) g0[lower.tri(g0)] <- 0 g1 <- amat * ((amat == 1) & !((amat > 0) & (t(amat) > 0))) g2 <- amat * ((amat == 100) & (t(amat) == 100)) g2[lower.tri(g2)] <- 0 g3 <- (amat == 101) + 0 i <- expand.grid(1:n, 1:n) u0 <- i[g0 > 0, ] u1 <- i[g1 > 0, ] u2 <- i[g2 > 0, ] u3 <- i[g3 > 0, ] if (nrow(coor) != length(v)) stop("Wrong coordinates of the vertices.") plot.dots(coor, v, dottype, n, beta) draw.edges(coor, u0, alpha, type = 0, lwd = lwd, ecol, bda) draw.edges(coor, u1, alpha, type = 1, lwd = lwd, ecol, bda) draw.edges(coor, u2, alpha, type = 2, lwd = lwd, ecol, bda) draw.edges(coor, u3, alpha, type = 3, lwd = lwd, ecol, bda) if (adjust) { repeat { xnew <- unlist(locator(1)) if (length(xnew) == 0) { break } d2 <- (xnew[1] - coor[, 1])^2 + (xnew[2] - coor[, 2])^2 i <- (1:n)[d2 == min(d2)] coor[i, 1] <- xnew[1] coor[i, 2] <- xnew[2] plot(c(0, 100), c(0, 100), type = "n", axes = FALSE, xlab = "", ylab = "") plot.dots(coor, v, dottype, n, beta) draw.edges(coor, u0, alpha, type = 0, lwd = lwd, ecol, bda) draw.edges(coor, u1, alpha, type = 1, lwd = lwd, ecol, bda) draw.edges(coor, u2, alpha, type = 2, lwd = lwd, ecol, bda) draw.edges(coor, u3, alpha, type = 3, lwd = lwd, ecol, bda) } } colnames(coor) <- c("x", "y") return(invisible(coor)) } #' d-separation #' #' Determines if in a directed acyclic graph two set of nodes a d-separated by #' a third set of nodes. #' #' d-separation is a fundamental concept introduced by Pearl (1988). #' #' @param amat a Boolean matrix with dimnames, representing the adjacency #' matrix of a directed acyclic graph. The function does not check if this is #' the case. See the function \code{isAcyclic}. #' @param first a vector representing a subset of nodes of the DAG. The vector #' should be a character vector of the names of the variables matching the #' names of the nodes in \code{rownames(A)}. It can be also a numeric vector of #' indices. #' @param second a vector representing another subset of nodes of the DAG. The #' set \code{second} must be disjoint from \code{first}. The mode of #' \code{second} must match the mode of \code{first}. #' @param cond a vector representing a conditioning subset of nodes. The set #' \code{cond} must be disjoint from the other two sets and must share the same #' mode. #' @return a logical value. \code{TRUE} if \code{first} and \code{second} are #' d-separated by \code{cond}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{DAG}}, \code{\link{shipley.test}}, #' \code{\link{inducedCovGraph}} #' @references Pearl, J. (1988). \emph{Probabilistic reasoning in intelligent #' systems.} San Mateo: Morgan Kaufmann. #' #' Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press. #' @keywords graphs models multivariate #' @examples #' #' ## Conditioning on a transition node #' dSep(DAG(y ~ x, x ~ z), first="y", second="z", cond = "x") #' ## Conditioning on a collision node (collider) #' dSep(DAG(y ~ x, y ~ z), first="x", second="z", cond = "y") #' ## Conditioning on a source node #' dSep(DAG(y ~ x, z ~ x), first="y", second="z", cond = "x") #' ## Marginal independence #' dSep(DAG(y ~ x, y ~ z), first="x", second="z", cond = NULL) #' ## The DAG defined on p.~47 of Lauritzen (1996) #' dag <- DAG(g ~ x, h ~ x+f, f ~ b, x ~ l+d, d ~ c, c ~ a, l ~ y, y ~ b) #' dSep(dag, first="a", second="b", cond=c("x", "y")) #' dSep(dag, first="a", second=c("b", "d"), cond=c("x", "y")) #' "dSep" <- function(amat, first, second, cond) { ### Are first and second d-Separated by cond in a DAG? e <- inducedCovGraph(amat, sel=c(first,second), cond=cond) all(e[first,second] == 0) } #' Edge matrix of a graph #' #' Transforms the adjacency matrix of a graph into an ``edge matrix''. #' #' In some matrix computations for graph objects the adjacency matrix of the #' graph is transformed into an ``edge matrix''. Briefly, if \eqn{E} is the #' adjacency matrix of the graph, the edge matrix is \eqn{A = #' sign(E+I)^T=[a_{ij}]}. Thus, \eqn{A} has ones along the diagonal and if the #' graph has no edge between nodes \eqn{i} and \eqn{j} the entries #' \eqn{a_{i,j}} and \eqn{a_{j,i}} are both zero. If there is an arrow from #' \eqn{j} to \eqn{i} \eqn{a_{i,j}=1} and \eqn{a_{j,i} = 0}. If there is an #' undirected edge, both \eqn{a_{i,j}=a_{j,i}=1}. #' #' @param E a square matrix, representing the adjacency matrix of a graph. #' @param inv a logical value. #' @return \item{A}{the edge matrix of the graph. If \code{TRUE} the nodes are #' sorted in inverted topological order and the edge matrix is upper #' triangular.} #' @author Giovanni M. Marchetti #' @seealso \code{\link{adjMatrix}} #' @references Wermuth, N. (2003). Analysing social science data with graphical #' Markov models. In: \emph{Highly Structured Stochastic Systems.} P. Green, N. #' Hjort \& T. Richardson (eds.), 47--52. Oxford: Oxford University Press. #' @keywords array algebra graphs multivariate #' @examples #' #' amat <- DAG(y ~ x+z, z~u+v) #' amat #' edgematrix(amat) #' edgematrix(amat, inv=TRUE) #' "edgematrix" <- function (E, inv=FALSE) { ### From the adjacency matrix to the edge matrix E <- sign(E) if(inv){ ord <- topOrder(E) ord <- rev(ord) # Inverse topological order: Nanny ordering. E <- E[ord, ord] } A <- t(E) diag(A) <- 1 A } #' Essential graph #' #' Find the essential graph from a given directed acyclic graph. #' #' Converts a DAG into the Essential Graph. Is implemented by the algorithm by #' D.M.Chickering (1995). #' #' @param dagx a square binary matrix, the adjacency matrix of a directed #' acyclic graph. The names of rows and of the columns are the nodes of the #' DAG. #' @return returns the adjacency matrix of the essential graph. #' @author Giovanni M. Marchetti, translating a MATLAB function by Tomas Kocka, #' AAU #' @seealso \code{\link{DAG}}, \code{\link{InducedGraphs}} #' @references Chickering, D.M. (1995). A transformational characterization of #' equivalent Bayesian network structures. \emph{Proceedings of Eleventh #' Conference on Uncertainty in Artificial Intelligence}, Montreal, QU, 87-98. #' Morgan Kaufmann. #' #' \url{http://research.microsoft.com/~dmax/publications/uai95.pdf} #' @keywords graphs models multivariate #' @examples #' #' dag = DAG(U ~ Y+Z, Y~X, Z~X) #' essentialGraph(dag) #' "essentialGraph" <- function(dagx){ ### Converts a DAG into Essential Graph. ### Is implemented by the algorithm by D.M.Chickering (1995). ### A transformational characterization of equivalent Bayesian network ### structures. Proceedings of Eleventh Conference on Uncertainty in ### Artificial Intelligence, Montreal, QU, pages 87-98. Morgan Kaufmann ### http://research.microsoft.com/~dmax/publications/uai95.pdf ### Implemented in Matlab by Tomas Kocka, AAU. ### Translated in R by Giovanni Marchetti, University of Florence. ord <- topOrder(dagx); # get the topological order of nodes n <- nrow(dagx) # gets the number of nodes i <- expand.grid(1:n, 1:n) # finds all nonzero elements in the adj matrix IJ <- i[dagx==1,] # sort the arcs from lowest possible y I <- IJ[, 1]; J <- IJ[, 2] # and highest possible x, arcs are x->y e <- 1 for(y in 1:n){ for(x in n:1){ if(dagx[ord[x], ord[y]] == 1) { I[e] <- ord[x] J[e] <- ord[y] e <- e + 1 } } } ## Now we have to decide which arcs are part of the essential graph and ## which are undirected edges in the essential graph. ## Undecided arc in the DAG are 1, directed in EG are 2 and undirected in EG are 3. for(e in 1:length(I)){ if(dagx[I[e],J[e]] == 1){ cont <- TRUE for(w in 1:n){ if(dagx[w,I[e]] == 2){ if(dagx[w,J[e]] != 0) dagx[w,J[e]] <- 2 else { for(ww in 1:n){ if(dagx[ww,J[e]] != 0) dagx[ww,J[e]] <- 2 } # skip the rest and start with another arc from the list w <- n cont <- FALSE } } } if(cont){ exists <- FALSE for(z in 1:n){ if((dagx[z,J[e]] != 0) & (z != I[e]) & (dagx[z,I[e]] == 0)){ exists <- TRUE for(ww in 1:n){ if(dagx[ww,J[e]] == 1){ dagx[ww,J[e]] <- 2 } } } } if(!exists){ for(ww in 1:n){ if(dagx[ww,J[e]] == 1){ dagx[ww,J[e]] <- 3 } } } } } } (dagx==2) + (dagx==3) + t(dagx==3) } #' Finding paths #' #' Finds one path between two nodes of a graph. #' #' #' @param amat a square Boolean matrix with dimnames, the adjacency matrix of a #' graph. #' @param st an integer, the starting node. #' @param en an integer, the ending node. #' @param path a vector of integers, used in recursive calls. At the beginning #' is \code{NULL}. It should not be modified by the user. #' @return a vector of integers, the sequence of nodes of a path, starting from #' \code{st} to \code{en}. In some graphs (spanning trees) there is only one #' path between two nodes. #' @note This function is not intended to be directly called by the user. #' @author Giovanni M. Marchetti, translating the original \pkg{Python} code #' (see references). #' @seealso \code{\link{fundCycles}} #' @references Python Softftware Foundation (2003). Python Patterns --- #' Implementing Graphs. \url{http://www.python.org/doc/essays/graphs/}. #' @keywords graphs #' @examples #' #' ## A (single) path on a spanning tree #' findPath(bfsearch(UG(~ a*b*c + b*d + d*e+ e*c))$tree, st=1, en=5) #' "findPath" <- function (amat, st, en, path = c()) { ### Find a path between nodes st and en in a UG with adjacency mat. amat. indices <- 1:nrow(amat) if(st == en) # st is 'node' in recursive calls return(c(path, st)) if(sum(amat[st,]) == 0 ) return(NULL) ## ne <- bd(st,amat) ne <- indices[amat[st,]==1] # Boundary of x. Assumes that amat is symmetric for(node in ne){ if(!is.element(node, c(path, st))){ newpath <- findPath(amat, node, en, c(path, st)) if(!is.null(newpath)) return(newpath) } } } #' Fitting of Gaussian Ancestral Graph Models #' #' Iterative conditional fitting of Gaussian Ancestral Graph Models. #' #' In the Gaussian case, the models can be parameterized using precision #' parameters, regression coefficients, and error covariances (compare #' Richardson and Spirtes, 2002, Section 8). This function finds the MLE #' \eqn{\hat \Lambda}{L} of the precision parameters by fitting a concentration #' graph model. The MLE \eqn{\hat B}{B} of the regression coefficients and the #' MLE \eqn{\hat\Omega}{O} of the error covariances are obtained by iterative #' conditional fitting (Drton and Richardson, 2003, 2004). The three sets of #' parameters are combined to the MLE \eqn{\hat\Sigma}{S} of the covariance #' matrix by matrix multiplication: \deqn{\hat\Sigma = \hat B^{-1}(\hat #' \Lambda+\hat\Omega)\hat }{S = B^(-1) (L+O) B^(-t).}\deqn{ B^{-T}.}{S = #' B^(-1) (L+O) B^(-t).} Note that in Richardson and Spirtes (2002), the #' matrices \eqn{\Lambda}{L} and \eqn{\Omega}{O} are defined as submatrices. #' #' @param amat a square matrix, representing the adjacency matrix of an #' ancestral graph. #' @param S a symmetric positive definite matrix with row and col names, the #' sample covariance matrix. #' @param n the sample size, a positive integer. #' @param tol a small positive number indicating the tolerance used in #' convergence checks. #' @return \item{Shat}{the fitted covariance matrix.} \item{Lhat}{matrix of the #' fitted precisions associated with undirected edges and vertices that do not #' have an arrowhead pointing at them.} \item{Bhat}{matrix of the fitted #' regression coefficients associated to the directed edges. Precisely said #' \code{Bhat} contains ones on the diagonal and the off-diagonal entry #' \eqn{(i,j)}{(i,j)} equals the \emph{negated} MLE of the regression #' coefficient for variable \eqn{j}{j} in the regression of variable \eqn{i}{i} #' on its parents. Note that this \eqn{(i,j)}{(i,j)} entry in \code{Bhat} #' corresponds to a directed edge \eqn{j \to i}{j -> i}, and thus to a one as #' \eqn{(j,i)}{(j,i)} entry in the adjacency matrix.} \item{Ohat}{matrix of the #' error covariances and variances of the residuals between regression #' equations associated with bi-directed edges and vertices with an arrowhead #' pointing at them.} \item{dev}{the `deviance' of the model.} \item{df}{the #' degrees of freedom.} \item{it}{the iterations.} #' @author Mathias Drton #' @seealso \code{\link{fitCovGraph}}, \code{\link{icf}}, \code{\link{makeMG}}, #' \code{\link{fitDag}} #' @references Drton, M. and Richardson, T. S. (2003). A new algorithm for #' maximum likelihood estimation in Gaussian graphical models for marginal #' independence. \emph{Proceedings of the Nineteenth Conference on Uncertainty #' in Artificial Intelligence}, 184-191. #' #' Drton, M. and Richardson, T. S. (2004). Iterative Conditional Fitting for #' Gaussian Ancestral Graph Models. Proceedings of the 20th Conference on #' Uncertainty in Artificial Intelligence, Department of Statistics, 130-137. #' #' Richardson, T. S. and Spirtes, P. (2002). Ancestral Graph Markov Models. #' \emph{Annals of Statistics}. 30(4), 962-1030. #' @keywords graphs models ancestral graph multivariate #' @examples #' #' ## A covariance matrix #' "S" <- structure(c(2.93, -1.7, 0.76, -0.06, #' -1.7, 1.64, -0.78, 0.1, #' 0.76, -0.78, 1.66, -0.78, #' -0.06, 0.1, -0.78, 0.81), .Dim = c(4,4), #' .Dimnames = list(c("y", "x", "z", "u"), c("y", "x", "z", "u"))) #' ## The following should give the same fit. #' ## Fit an ancestral graph y -> x <-> z <- u #' fitAncestralGraph(ag1 <- makeMG(dg=DAG(x~y,z~u), bg = UG(~x*z)), S, n=100) #' #' ## Fit an ancestral graph y <-> x <-> z <-> u #' fitAncestralGraph(ag2 <- makeMG(bg= UG(~y*x+x*z+z*u)), S, n=100) #' #' ## Fit the same graph with fitCovGraph #' fitCovGraph(ag2, S, n=100) #' #' ## Another example for the mathematics marks data #' #' data(marks) #' S <- var(marks) #' mag1 <- makeMG(bg=UG(~mechanics*vectors*algebra+algebra*analysis*statistics)) #' fitAncestralGraph(mag1, S, n=88) #' #' mag2 <- makeMG(ug=UG(~mechanics*vectors+analysis*statistics), #' dg=DAG(algebra~mechanics+vectors+analysis+statistics)) #' fitAncestralGraph(mag2, S, n=88) # Same fit as above #' `fitAncestralGraph` <- function (amat, S, n, tol = 1e-06){ ### Fit Ancestral Graphs. Mathias Drton, 2003 2009. It works for ADMGs nam <- rownames(S) nod <- rownames(amat) ## permute graph to have same layout as S if(is.null(nod)){ stop("The adjacency matrix has no labels!") } if(!all(is.element(nod, nam))) stop("The nodes of the graph do not match the names of the variables") else sek <- intersect(nam, nod) S <- S[sek,sek, drop=FALSE] # Resizes eventually S amat <- amat[sek,sek, drop=FALSE] # and reorders amat temp <- icfmag(amat, S, tol) p <- ncol(S) df <- p*(p-1)/2 - sum(In(amat+t(amat)))/2 # Degrees of freedom dev <- likGau(solve(temp$Sigmahat), S, n, p) if(is.null(temp$Bhat)){ Beta <- NULL } else{ ## Beta <- diag(1,p)-temp$Bhat Beta <- temp$Bhat } return(list(Shat=temp$Sigmahat, Lhat=temp$Lambdahat, Bhat=Beta, Ohat=temp$Omegahat, dev = dev, df = df, it=temp$iterations)) } `likGau` = function(K, S, n, k){ # deviance of the Gaussian model. SK = S %*% K tr = function(A) sum(diag(A)) (tr(SK) - log(det(SK)) - k) * n } #' Fitting a Gaussian concentration graph model #' #' Fits a concentration graph (a covariance selection model). #' #' The algorithms for fitting concentration graph models by maximum likelihood #' are discussed in Speed and Kiiveri (1986). If the cliques are known the #' function uses the iterative proportional fitting algorithm described by #' Whittaker (1990, p. 184). If the cliques are not specified the function #' uses the algorithm by Hastie et al. (2009, p. 631ff). #' #' @param amat a square Boolean matrix representing the adjacency matrix of an #' UG #' @param S the sample covariance matrix #' @param n an integer denoting the sample size #' @param cli a list containing the cliques of the graph. The components of the #' list are character vectors containing the names of the nodes in the cliques. #' The names must match the names of the vertices. The knowledge of the cliques #' is not needed. If the cliques are not specified the function uses the #' algorithm by Hastie et al. (2009, p. 446). #' @param alg The algorithm used. #' @param pri If TRUE is verbose #' @param tol a small positive number indicating the tolerance used in #' convergence tests. #' @return \item{Shat}{the fitted covariance matrix.} \item{dev}{the `deviance' #' of the model.} \item{df}{the degrees of freedom.} \item{it}{the iterations.} #' @author Giovanni M. Marchetti #' @seealso \code{\link{UG}}, \code{\link{fitDag}}, \code{\link{marks}} #' @references Cox, D. R. and Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' #' Hastie, T., Tibshirani, R. and Friedman, J. (2009). \emph{The elements of #' statistical learning.} Springer Verlag: New York. #' #' Speed, T.P. and Kiiveri, H (1986). Gaussian Markov distributions over finite #' graphs. \emph{Annals of Statistics}, 14, 138--150. #' #' Whittaker, J. (1990). \emph{Graphical models in applied multivariate #' statistics}. Chichester: Wiley. #' @keywords graphs models multivariate #' @examples #' #' ## A model for the mathematics marks (Whittaker, 1990) #' data(marks) #' ## A butterfly concentration graph #' G <- UG(~ mechanics*vectors*algebra + algebra*analysis*statistics) #' fitConGraph(G, cov(marks), nrow(marks)) #' ## Using the cliques #' #' cl = list(c("mechanics", "vectors", "algebra"), c("algebra", "analysis" , "statistics")) #' fitConGraph(G, S = cov(marks), n = nrow(marks), cli = cl) #' `fitConGraph` <- function (amat, S, n, cli=NULL, alg = 3, pri = FALSE, tol = 1e-06) { ### Fits a concentration graph G. ### Now it does not compute the cliques of the graph. nam <- rownames(S) nod <- rownames(amat) if(is.null(nod)){ stop("The adjacency matrix has no labels.") } if(!all(is.element(nod, nam))) stop("The nodes of the graph do not match the names of the variables.") else sek <- intersect(nam, nod) S <- S[sek,sek, drop=FALSE] # Resizes eventually S amat <- amat[sek,sek, drop=FALSE] # and reorders amat nod <- rownames(amat) if (all(amat==0)){ alg <- 2 cli = as.list(nod) } if(is.null(cli)){ alg <- 3 } else { alg <- 2 nc <- length(cli) if(nc==1) { return(list(Shat = S, dev = 0, df = 0, it=1)) } } k <- ncol(S) if(alg == 1){ # First algorithm by Whittaker (needs the cliques) it <- 0 W <- diag(diag(S)) # Starting value dimnames(W) <- dimnames(S) repeat { W.old <- W it <- it+1 for (i in 1:nc) { a <- cli[[i]] b <- setdiff(nod, a) Saa <- S[a, a] Waa <- W[a, a] Wba <- W[b, a] Wbb <- W[b, b] B <- Wba %*% solve(Waa) Spar <- Wbb - B %*% Waa %*% t(B) BV <- B %*% Saa W[b, a] <- BV W[a, b] <- t(BV) W[a, a] <- Saa W[b, b] <- Spar + BV %*% t(B) } if(sum(abs(W-W.old)) < tol) break } } else if(alg==2) { # Second algorithm by Whittaker (needs the cliques) it = 0 K <- solve(diag(diag(S))) # Starting value dimnames(K) <- dimnames(S) repeat { K.old <- K it <- it+1 for (i in 1:nc) { a <- cli[[i]] b <- setdiff(nod, a) K[a,a] <- solve(S[a,a]) + K[a,b] %*% solve(K[b,b]) %*% K[b,a] if(pri) { dev <- likGau(K, S, n, k) cat(dev, "\n") } } if(sum(abs(K-K.old)) < tol) break } W <- solve(K) } else if(alg==3){ # Hastie Friedman Tibshirani p. 791 W0 <- S ; W <- S it <- 0 converge = FALSE while( !converge ) { it <- it+1 for (j in 1:k){ W11 <- W[-j,-j,drop=FALSE] w12 <- W[-j,j] s12 <- S[-j,j, drop=FALSE] paj <- amat[j,] == 1; # neighbors paj <- paj[-j] beta <- rep(0, k-1) if (all(!paj)){ w <- rep(0, k-1) } else{ beta[paj] <- solve(W11[paj, paj], s12[paj, ]) w <- W11 %*% beta } W[-j, j] <- w W[j, -j] <- w } di <- norm(W0-W) if(pri) { cat(di, "\n") } if (di < tol){ converge <- TRUE } else { W0 <- W } } } df <- (sum(1-amat) - k)/2 Kh <- solve(W) dev <- likGau(Kh, S, n, k) list(Shat = W, dev = dev, df = df, it=it) } #' Fitting of Gaussian covariance graph models #' #' Fits a Gaussian covariance graph model by maximum likelihood. #' #' A covariance graph is an undirected graph in which the variables associated #' to two non-adjacent nodes are marginally independent. The edges of these #' models are represented by bi-directed edges (Drton and Richardson, 2003) or #' by dashed lines (Cox and Wermuth, 1996). #' #' By default, this function gives the ML estimates in the covariance graph #' model, by iterative conditional fitting (Drton and Richardson, 2003). #' Otherwise, the estimates from a ``dual likelihood'' estimator can be #' obtained (Kauermann, 1996; Edwards, 2000, section 7.4). #' #' @param amat A symmetric Booloean matrix with dimnames representing the #' adjacency matrix of an UG. #' @param S A symmetric positive definite matrix with dimnames, the sample #' covariance matrix. #' @param n A positive integer, the sample size. #' @param alg A character string, the algorithm used. If \code{alg="icf"} (the #' default) the algorithm is based on iterative conditional fitting (see Drton #' and Richardson, 2003). In this case the ML estimates are returned. If #' \code{alg="dual"} the algorithm is based on the dual likelihood (see #' Kauermann, 1996). The fitted values are an approximation of the ML #' estimates. #' @param dual.alg And integer equal to 1 or 2. It is used if #' \code{alg="dual"}. In this case a concentration graph model is fitted to the #' inverse of the sample covariance matrix, and \code{dual.alg} is passed to #' \code{fitConGraph} to specify the algorithm used in \code{fitConGraph}. #' @param start.icf A symmetric matrix used as starting value of the algorithm. #' If \code{start=NULL} the starting value is a diagonal matrix with diagonal #' entries equal to sample variances. #' @param tol A small positive number indicating the tolerance used in #' convergence tests. #' @return \item{Shat}{the fitted covariance matrix.} \item{dev}{the `deviance' #' of the model.} \item{df}{the degrees of freedom.} \item{it}{the iterations.} #' @author Mathias Drton #' @seealso \code{\link{fitConGraph}}, \code{\link{icf}} #' @references Cox, D. R. and Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' #' Drton, M. and Richardson, T. S. (2003). A new algorithm for maximum #' likelihood estimation in Gaussian graphical models for marginal #' independence. \emph{Proceedings of the Nineteenth Conference on Uncertainty #' in Artificial Intelligence}, 184--191. #' #' Kauermann, G. (1996). On a dualization of graphical Gaussian models. #' \emph{Scandinavian Journal of Statistics}. 23, 105--116. #' @keywords graphs models multivariate #' @examples #' #' ## Correlations among four strategies to cope with stress for #' ## 72 students. Cox & Wermuth (1996), p. 73. #' #' data(stress) #' #' ## A chordless 4-cycle covariance graph #' G <- UG(~ Y*X + X*U + U*V + V*Y) #' #' fitCovGraph(G, S = stress, n=72) #' fitCovGraph(G, S = stress, n=72, alg="dual") #' `fitCovGraph` <- function (amat, S, n, alg="icf", dual.alg=2, start.icf=NULL, tol = 1e-06){ ### Fits a Covariance Graph. Mathias Drton, 2003 ### amat: adjacency matrix; S: covariance matrix; n: sample size. amat <- In(amat) # Forces the ones in a bidirected graph defined with makeMG nam <- rownames(S) nod <- rownames(amat) if(is.null(nod)){ stop("The adjacency matrix has no labels.") } if(!all(is.element(nod, nam))) stop("The nodes of the graph do not match the names of the variables.") else sek <- intersect(nam, nod) S <- S[sek,sek, drop=FALSE] # Resizes eventually S amat <- amat[sek,sek, drop=FALSE] # and reorders amat if(alg=="icf"){ temp <- icf(amat, S, start.icf, tol) } else{ if(alg == "dual"){ Sinv <- solve(S) temp <- fitConGraph(amat, Sinv, n, pri=FALSE, alg=dual.alg, tol = 1e-06) temp <- list(Sigmahat=zapsmall(solve(temp$Shat)), iterations=temp$it) } else{ stop("Algorithm misspecified!") } } df <- sum(amat[upper.tri(amat)] == 0) # Degrees of freedom k <- ncol(S) dev <- likGau(solve(temp$Sigmahat), S, n, k) return(list(Shat=temp$Sigmahat, dev = dev, df = df, it=temp$iterations)) } #' Fitting of Gaussian DAG models #' #' Fits linear recursive regressions with independent residuals specified by a #' DAG. #' #' \code{fitDag} checks if the order of the nodes in adjacency matrix is the #' same of \code{S} and if not it reorders the adjacency matrix to match the #' order of the variables in \code{S}. The nodes of the adjacency matrix may #' form a subset of the variables in \code{S}. #' #' @param amat a square matrix with dimnames representing the adjacency matrix #' of the DAG #' @param S a symmetric positive definite matrix, the sample covariance matrix #' @param n an integer > 0, the sample size #' @return \item{Shat}{the fitted covariance matrix.} \item{Ahat}{a square #' matrix of the fitted regression coefficients. The entry \code{Ahat[i,j]} is #' minus the regression coefficient of variable \code{i} in the regression #' equation \code{j}. Thus there is a non zero partial regression coefficient #' \code{Ahat[i,j]} corresponding to each non zero value \code{amat[j,i]} in #' the adjacency matrix.} \item{Dhat}{a vector containing the partial variances #' of each variable given the parents.} \item{dev}{the `deviance' of the #' model.} \item{df}{the degrees of freedom.} #' @author Giovanni M. Marchetti #' @seealso \code{\link{DAG}}, \code{\link{swp}}. #' @references Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' @keywords graphs models multivariate #' @examples #' #' dag <- DAG(y ~ x+u, x ~ z, z ~ u) #' "S" <- structure(c(2.93, -1.7, 0.76, -0.06, #' -1.7, 1.64, -0.78, 0.1, #' 0.76, -0.78, 1.66, -0.78, #' -0.06, 0.1, -0.78, 0.81), .Dim = c(4,4), #' .Dimnames = list(c("y", "x", "z", "u"), c("y", "x", "z", "u"))) #' fitDag(dag, S, 200) #' "fitDag" <- function (amat, S, n) { ### Fits linear recursive regressions with independent residuals. ### amat: the adjacency matrix of the DAG. S: cov matrix. n: sample size. if(missing(amat)){ # saturated model amat <- lower.tri(diag(ncol(S)), diag=FALSE) * 1 dimnames(amat) <- dimnames(S) } nam <- rownames(S) nod <- rownames(amat) if(is.null(nod)) stop("The adjacency matrix has no labels.") if(!all(is.element(nod, nam))) stop("The nodes of the graph do not match the names of the variables.") else sek <- intersect(nam, nod) S <- S[sek,sek] # Resizes eventually S amat <- amat[sek,sek] # and reorders amat Delta <- rep(length(sek),0) emat <- edgematrix(amat) A <- emat p <- ncol(S) ip <- 1:p for(i in 1:p) { u <- emat[i,] v <- ip[u == 1 & ip != i] M <- swp(S, v)[i,] A[i, ] <- - A[i, ] * M A[i,i] <- 1 k <- sum(A[i,]) Delta[i] <- M[i] } names(Delta) <- sek B <- solve(A) Shat <- B %*% diag(Delta) %*% t(B) dimnames(Shat) <- dimnames(S) Khat <- solve(Shat) H <- S %*% Khat trace <- function(A) sum(diag(A)) dev <- (trace(H) - log(det(H)) - p) * n df <- p*(p+1)/2 - sum(amat==1) - p list(Shat=Shat, Ahat = A, Dhat=Delta, dev=dev, df=df) } #' Fitting Gaussian DAG models with one latent variable #' #' Fits by maximum likelihood a Gaussian DAG model where one of the nodes of #' the graph is latent and it is marginalised over. #' #' For the EM algorithm used see Kiiveri (1987). #' #' @param amat a square matrix with dimnames representing the adjacency matrix #' of the DAG. #' @param Syy a symmetric positive definite matrix, with dimnames, the sample #' covariance matrix of the observed variables. The set of the observed nodes #' of the graph must be a subset of the set of the names of the variables in #' \code{Syy}. #' @param n a positive integer, the sample size. #' @param latent the name of the latent variable. #' @param norm an integer, the kind of normalization of the latent variable. #' If \code{norm=1}, the latent is scaled to have unit variance. If #' \code{norm=2}, the latent is scaled to have unit partial variance given its #' parents. #' @param seed an integer, used by \code{set.seed} to specify a random starting #' point of the EM algorithm. #' @param maxit an integer denoting the maximum number of iterations allowed #' for the EM algorithm. If the convergence criterion is not satisfied within #' maxit iterations the algorithms stops and a warning message is returned. #' @param tol a small real value, denoting the tolerance used in testing #' convergence. #' @param pri logical, if \code{pri=TRUE} then the value of the deviance at #' each iteration is printed. #' @return \item{Shat}{ the fitted covariance matrix of all the variables #' including the latent one. The latent variable is the last. If \code{norm=1} #' then the variance of the latent variable is constrained to 1. } #' \item{Ahat}{ a square matrix of the fitted regression coefficients. The #' entry \code{Ahat[i,j]} is minus the regression coefficient of variable #' \code{i} in the regression equation \code{j}. Thus there is a non zero #' partial regression coefficient \code{Ahat[i,j]} corresponding to each non #' zero value \code{amat[j,i]} in the adjacency matrix. } \item{Dhat}{ a #' vector containing the partial variances of each variable given the parents. #' If \code{norm=2} then the partial variance of the latent variable is #' constrained to 1. } \item{dev}{ the `deviance' of the model. } \item{df}{ #' the degrees of freedom of the model. } \item{it}{ the number of EM #' algorithm iterations at convergence. } #' @author Giovanni M. Marchetti #' @seealso \code{\link{fitDag}}, \code{\link{checkIdent}} #' @references Kiiveri,H. T. (1987). An incomplete data approach to the #' analysis of covariance structures. \emph{Psychometrika}, 52, 4, 539--554. #' #' Joreskog, K.G. and Goldberger, A.S. (1975). Estimation of a model with #' multiple indicators and multiple causes of a single latent variable. #' \emph{Journal of the American Statistical Association}, 10, 631--639. #' @keywords graphs models multivariate #' @examples #' #' ## data from Joreskog and Goldberger (1975) #' V <- matrix(c(1, 0.36, 0.21, 0.10, 0.156, 0.158, #' 0.36, 1, 0.265, 0.284, 0.192, 0.324, #' 0.210, 0.265, 1, 0.176, 0.136, 0.226, #' 0.1, 0.284, 0.176, 1, 0.304, 0.305, #' 0.156, 0.192, 0.136, 0.304, 1, 0.344, #' 0.158, 0.324, 0.226, 0.305, 0.344, 1), 6,6) #' nod <- c("y1", "y2", "y3", "x1", "x2", "x3") #' dimnames(V) <- list(nod,nod) #' dag <- DAG(y1 ~ z, y2 ~ z, y3 ~ z, z ~ x1 + x2 + x3, x1~x2+x3, x2~x3) #' fitDagLatent(dag, V, n=530, latent="z", seed=4564) #' fitDagLatent(dag, V, n=530, latent="z", norm=2, seed=145) #' "fitDagLatent" <- function (amat, Syy, n, latent, norm = 1, seed, maxit=9000, tol=1e-6, pri=FALSE) { ### Fits linear recursive regressions with independent residuals and one latent variable. ### Syy: covariance matrix, n: sample size, amat: adjacency matrix. ### NOTE: both amat and Syy must have rownames. ### latent is the "name" of the latent in the rownames of Syy. norm = normalisation type. ## Local functions setvar1 <- function (V, z, paz, norm) ## paz are the parents of the latent (needed if norm=2) { ## Normalizes V if(norm == 1){ ## Rescales V forcing V[z,z] = 1 a <- 1 / sqrt(V[z,z]) } else if(norm== 2) { ## Rescales V forcing Delta[z,z] = 1 if(sum(paz) > 0) sig <- V[z,z] - V[z, paz] %*% solve(V[paz,paz]) %*% V[paz,z] else sig <- V[z,z] a <- 1/sqrt(sig) } V[z,] <- V[z,] * a V[,z] <- V[,z] * a V } cmqi <- function (Syy, Sigma, z) { ## Computes the matrix C(M | Q) by Kiiveri (1987), Psychometrika. ## It is a slight generalization in which Z is not the last element. ## z is a Boolean vector indicating the position of the latent variable in X. y <- ! z Q <- solve(Sigma) Qzz <- Q[z,z] Qzy <- Q[z,y] B <- - solve(Qzz) %*% Qzy BSyy <- B %*% Syy E <- Sigma*0 E[y,y] <- Syy E[y,z] <- t(BSyy) E[z,y] <- BSyy E[z,z] <- BSyy %*% t(B) + solve(Qzz) dimnames(E) <- dimnames(Sigma) E } fitdag <- function (amat, S,n, constr=NULL) { ## Fits linear recursive regressions with independent residuals (fast version). ## NOTE. amat and S must have the same size and variables. constr is a matrix ## indicating the edges that must be constrained to 1. emat <- edgematrix(amat) A <- emat p <- ncol(S) Delta <- rep(p,0) ip <- 1:p for(i in ip) { u <- emat[i,] v <- ip[(u == 1) & (ip != i)] # Parents if(length(v) == 0){ # If pa is empty Delta[i] <- S[i,i] next } M <- lmfit(S, y=i, x=v, z=(constr[i, ] == 1)) A[i, ] <- - A[i, ] * M A[i,i] <- 1 k <- sum(A[i,]) Delta[i] <- M[i] } Khat <- t(A) %*% diag(1/Delta) %*% A Shat <- solve(Khat) list(A = A, Delta=Delta, Shat=Shat, Khat=Khat) } lmfit <- function(S, y, x, z=NULL){ ## Regression coefficients of y given x eventually with z constrained to 1. ## Residual variance in position y. Sxy <- S[x, y, drop=FALSE] - apply(S[x, z, drop=FALSE], 1, sum) Sxx <- S[x, x, drop=FALSE] bxy <- solve(Sxx,Sxy) out<- rep(0, nrow(S)) out[x] <- bxy out[z] <- 1 names(out) <- rownames(S) xz <- c(x,z) b <- out[xz, drop=FALSE] res <- S[y,y] + t(b) %*% (S[xz, xz, drop=FALSE] %*% b - 2 * S[xz,y, drop=FALSE]) out[y] <- res out } lik <- function(K, S, n, p){ ## Computes the deviance trace <- function(a) sum(diag(a)) SK <- S %*% K (trace(SK) - log(det(SK)) - p) * n } ## Beginning of the main function nam <- rownames(Syy) # Names of the variables (they can be more than the nodes) nod <- rownames(amat) # Names of the nodes of the DAG (that contains the latent) if(is.null(nod)) stop("The adjacency matrix has no labels.") if(!all(is.element(setdiff(nod, latent), nam))) stop("The observed nodes of the graph do not match the names of the variables.") else sek <- intersect(nam, nod) Syy <- Syy[sek,sek, drop=FALSE] # Resizes eventually S sek <- c(sek, latent) amat <- amat[sek,sek, drop=FALSE] # and reorders amat nod <- rownames(amat) paz <- pa(latent, amat) # Parents of the latent paz <- is.element(nod, paz) dn <- list(nod,nod) wherez <- is.element(nod, latent) # Index of the latent if(is.null(wherez)) stop("Wrong name of the latent variable!") wherey <- ! wherez p <- ncol(Syy) df <- p*(p+1)/2 - sum(amat==1) - p # Degrees of freedom if(df <= 0) warning(paste("The degrees of freedom are ", df)) if(!missing(seed)) set.seed(seed) # For random starting value Sigma.old <- rcorr(p+1) Sigma.old <- setvar1(Sigma.old, wherez, paz, norm=norm) dimnames(Sigma.old) <- dn it <- 0 repeat{ it <- it+1 if(it > maxit){ warning("Maximum number of iterations reached.") break } Q <- cmqi(Syy, Sigma.old, wherez) # E-step. See Kiiveri, 1987 fit <- fitdag(amat, Q, n) # M-step Sigma.new <- fit$Shat if(pri) { dev <- lik(fit$Khat, Q, n, (p+1)) # Monitoring progress of iterations cat(dev, "\n") } else{ if(0==(it %% 80)) cat("\n") else cat(".") } if(sum(abs(Sigma.new - Sigma.old)) < tol) break # Test convergence Sigma.old <- Sigma.new } cat("\n") dev <- lik(fit$Khat, Q, n, (p+1)) Shat <- setvar1(Sigma.new, wherez, paz, norm=norm) # Normalize Shat dimnames(Shat) <- dn Khat <- solve(Shat) fit <- fitDag(amat, Shat, n) list(Shat=Shat, Ahat=fit$Ahat, Dhat=fit$Dhat, dev=dev, df=df, it=it) } #' Fundamental cycles #' #' Finds the list of fundamental cycles of a connected undirected graph. #' #' All the cycles in an UG can be obtained from combination (ring sum) of the #' set of fundamental cycles. #' #' @param amat a symmetric matrix with dimnames denoting the adjacency matrix #' of the undirected graph. The graph must be connected, otherwise the function #' returns an error message. #' @return a list of matrices with two columns. Every component of the list is #' associated to a cycle. The cycle is described by a \eqn{k \times 2} matrix #' whose rows are the edges of the cycle. If there is no cycle the function #' returns \code{NULL}. #' @note This function is used by \code{cycleMatrix} and \code{isGident}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{UG}},\code{\link{findPath}}, \code{\link{cycleMatrix}}, #' \code{\link{isGident}},\code{\link{bfsearch}} #' @references Thulasiraman, K. \& Swamy, M.N.S. (1992). \emph{Graphs: theory #' and algorithms}. New York: Wiley. #' @keywords graphs models multivariate #' @examples #' #' ## Three fundamental cycles #' fundCycles(UG(~a*b*d + d*e + e*a*f)) #' "fundCycles" <- function(amat){ ### Finds a set of fundamental cycles for an UG with adj. matrix amat. fc <- c() tr <- bfsearch(amat) # Spanning tree if(is.null(tr)) return(NULL) if(is.null(tr$chords)) return(NULL) co <- tr$chords # edges of the cospanning tree for(i in 1:nrow(co)) { e <- co[i,] g <- tr$tree # edge matrix of the spanning tree cy <- findPath(g, st=e[1], en=e[2]) splitCycle <- function(v){ ## Splits a cycle v into a matrix of edges. v <- c(v, v[1]) cbind(v[-length(v)], v[-1]) } cy <- splitCycle(cy) fc <- c(fc, list(cy)) } fc } #' Iterative conditional fitting #' #' Main algorithm for MLE fitting of Gaussian Covariance Graphs and Gaussian #' Ancestral models. #' #' These functions are not intended to be called directly by the user. #' #' @aliases icf icfmag #' @param bi.graph a symmetric matrix with dimnames representing the adjacency #' matrix of an undirected graph. #' @param mag a square matrix representing the adjacency matrix of an ancestral #' graph (for example returned by \code{makeAG}). #' @param S a symmetric positive definite matrix, the sample covariance matrix. #' The order of the variables must be the same of the order of vertices in the #' adjacency matrix. #' @param start a symmetric matrix used as starting value of the algorithm. If #' \code{start=NULL} the starting value is a diagonal matrix. #' @param tol A small positive number indicating the tolerance used in #' convergence tests. #' @return \item{Sigmahat}{the fitted covariance matrix.} \item{Bhat}{matrix of #' the fitted regression coefficients associated to the directed edges.} #' \item{Omegahat}{matrix of the partial covariances of the residuals between #' regression equations.} \item{iterations}{the number of iterations.} #' @author Mathias Drton #' @seealso \code{\link{fitCovGraph}}, \code{\link{fitAncestralGraph}} #' @references Drton, M. \& Richardson, T. S. (2003). A new algorithm for #' maximum likelihood estimation in Gaussian graphical models for marginal #' independence. \emph{Proceedings of the Ninetheen Conference on Uncertainty #' in Artificial Intelligence}, 184--191. #' #' Drton, M. \& Richardson, T. S. (2004). Iterative Conditional Fitting for #' Gaussian Ancestral Graph Models. Proceedings of the 20th Conference on #' Uncertainty in Artificial Intelligence, Department of Statistics, 130--137. #' @keywords internal "icf" <- function(bi.graph, S, start=NULL, tol = 1e-06){ ### Iterative conditional fitting for bidirected graphs. Mathias Drton, 2003 if(!is.matrix(S)){ stop("Second argument is not a matrix!") } if(dim(S)[1]!=dim(S)[2]){ stop("Second argument is not a square matrix!") } if(min(eigen(S)[[1]])<=0){ stop("Second argument is not a positive definite matrix!") } p <- nrow(S) i <- 0 ## prep spouses and non-spouses pa.each.node <-function(amat){ ## List of the parents of each node. ## If amat is symmetric it returns the boundaries. p <- nrow(amat) b <- vector(p, mode="list") ip <- 1:p for(i in 1:p) b[[i]] <- ip[amat[,i]==1] b } spo <- pa.each.node(bi.graph) nsp <- pa.each.node(cmpGraph(bi.graph)) number.spouses <- unlist(lapply(spo, length)) no.spouses <- (1:p)[number.spouses==0] all.spouses <- (1:p)[number.spouses==(p-1)] nontrivial.vertices <- setdiff((1:p), no.spouses) if(length(nontrivial.vertices)==0){ if(p==1){ Sigma <- S } else{ Sigma <- diag(diag(S)) dimnames(Sigma) <- dimnames(S) } return(list(Sigmahat=Sigma, iterations=1)) } if(is.null(start)){ Sigma <- as.matrix(diag(diag(S))) # starting value } else{ temp <- diag(start) start[bi.graph==0] <- 0 diag(start) <- temp diag(start)[no.spouses] <- diag(S)[no.spouses] Sigma <- as.matrix(start) if(min(eigen(Sigma)$values) <= 0){ stop("Starting value is not feasible!") } } repeat{ i <- i+1 Sigma.old <- Sigma for(v in nontrivial.vertices){ if(is.element(v, all.spouses)){ B <- S[v,-v]%*%solve(S[-v,-v]) lambda <- S[v,v]-B%*%S[-v,v] } else{ B.spo.nsp <- Sigma[spo[[v]],nsp[[v]]]%*%solve(Sigma[nsp[[v]],nsp[[v]]]) YZ <- S[v,spo[[v]]]-S[v,nsp[[v]]]%*%t(B.spo.nsp) B.spo <- YZ %*% solve( S[spo[[v]],spo[[v]]] -S[spo[[v]],nsp[[v]]]%*%t(B.spo.nsp) -B.spo.nsp%*%S[nsp[[v]],spo[[v]]] +B.spo.nsp%*%S[nsp[[v]],nsp[[v]]]%*%t(B.spo.nsp) ) lambda <- S[v,v]-B.spo%*%t(YZ) B.nsp <- -B.spo%*%B.spo.nsp B <- rep(0, p) B[spo[[v]]] <- B.spo B[nsp[[v]]] <- B.nsp B <- B[-v] } ## here I can improve by only using B[spo[[v]]]! Sigma[v,-v] <- B%*%Sigma[-v,-v] Sigma[v,nsp[[v]]] <- 0 Sigma[-v,v] <- t(Sigma[v,-v]) Sigma[v,v] <- lambda + B%*%Sigma[-v,v] } if(sum(abs(Sigma.old-Sigma)) < tol) break } dimnames(Sigma) <- dimnames(S) return(list(Sigmahat=Sigma, iterations=i)) } `icfmag` <- function(mag, S, tol = 1e-06){ ## Iterative conditional fitting for ancestral and mixed graphs. Mathias Drton, 2003, 2009. if(!is.matrix(S)){ stop("Second argument is not a matrix!") } if(dim(S)[1]!=dim(S)[2]){ stop("Second argument is not a square matrix!") } if(min(eigen(S)[[1]])<=0){ stop("Second argument is not a positive definite matrix!") } p <- nrow(S) # Dimensionality temp <- unmakeMG(mag) mag.ug <- temp$ug mag.dag <- temp$dg mag.bg <- temp$bg ## Catch trivial case if(p==1){ return(list(Sigmahat=S, Omegahat=S, Bhat=NULL, Lambdahat=NULL, iterations=1)) } ## Starting value Omega <- diag(diag(S)) dimnames(Omega) <- dimnames(S) B <- diag(p) dimnames(B) <- dimnames(S) ## IPS for UG UG.part <- (1:p)[0==apply(mag.dag + mag.bg,2,sum)] if(length(UG.part)> 0){ Lambda.inv <- fitConGraph(mag.ug[UG.part,UG.part, drop=FALSE], S[UG.part,UG.part, drop=FALSE], p+1,tol = tol)$Shat Omega[UG.part,UG.part] <- Lambda.inv } ## Prepare list of spouses, parents, etc. pa.each.node <-function(amat){ ## List of the parents of each node. ## If the adjacency matrix is symmetric it gives the boundary. p <- nrow(amat) b <- vector(p, mode="list") ip <- 1:p for(i in 1:p) b[[i]] <- ip[amat[,i]==1] b } spo <- pa.each.node(mag.bg) nsp <- pa.each.node(cmpGraph(mag.bg)) pars <- pa.each.node(mag.dag) i <- 0 repeat{ i <- i+1 Omega.old <- Omega B.old <- B for(v in setdiff(1:p, UG.part)){ parv <- pars[[v]] spov <- spo[[v]] if(length(spov)==0){ if(length(parv)!=0){ if(i == 1){ # do it only once ## attention: B = - beta B[v,parv] <- -S[v,parv]%*%solve(S[parv,parv]) Omega[v,v] <- S[v,v]+B[v,parv]%*%S[parv,v] } } } else{ if(length(parv)!=0){ O.inv <- matrix(0, p,p) O.inv[-v,-v] <- solve(Omega[-v,-v]) Z <- O.inv[spov,-v] %*%B[-v,] lpa <- length(parv) lspo <- length(spov) XX <- matrix(0, lpa+lspo, lpa+lspo) XX[1:lpa, 1:lpa] <- S[parv,parv] XX[1:lpa,(lpa+1):(lpa+lspo)] <- S[parv,]%*%t(Z) XX[(lpa+1):(lpa+lspo),1:lpa] <- t(XX[1:lpa,(lpa+1):(lpa+lspo)]) XX[(lpa+1):(lpa+lspo),(lpa+1):(lpa+lspo)] <- Z%*%S%*%t(Z) YX <- c(S[v,parv], S[v,]%*%t(Z)) temp <- YX %*% solve(XX) B[v,parv] <- -temp[1:lpa] Omega[v,spov] <- temp[(lpa+1):(lpa+lspo)] Omega[spov,v] <- Omega[v,spov] temp.var <- S[v,v] - temp %*% YX Omega[v,v] <- temp.var + Omega[v,spov] %*% O.inv[spov,spov] %*% Omega[spov,v] } else{ O.inv <- matrix(0, p,p) O.inv[-v,-v] <- solve(Omega[-v,-v]) Z <- O.inv[spov,-v] %*%B[-v,] XX <- Z%*%S%*%t(Z) YX <- c(S[v,]%*%t(Z)) Omega[v,spov] <- YX %*% solve(XX) Omega[spov,v] <- Omega[v,spov] temp.var <- S[v,v] - Omega[v,spov] %*% YX Omega[v,v] <- temp.var + Omega[v,spov] %*% O.inv[spov,spov] %*% Omega[spov,v] } } } if(sum(abs(Omega.old-Omega)) + sum(abs(B.old-B)) < tol) break } Sigma <- solve(B)%*%Omega%*%solve(t(B)) ## Corrections by Thomas Richardson of the following: ## Lambda <- Omega ## Lambda[-UG.part,-UG.part] <- 0 Lambda <- matrix(0, p, p) if(length(UG.part) > 0){ Lambda[-UG.part, -UG.part] <- Omega[-UG.part, -UG.part] } Omega[UG.part,UG.part] <- 0 return(list(Sigmahat=Sigma, Bhat=B, Omegahat=Omega, Lambdahat=Lambda, iterations=i)) } #' Indicator matrix #' #' Finds the indicator matrix of the zeros of a matrix. #' #' The indicator matrix is a matrix of zeros and ones which has a zero element #' iff the corresponding element of \code{A} is (exactly) zero. #' #' @param A a matrix. #' @return a matrix of the same dimensions as \code{A}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{DAG}}, \code{\link{inducedCovGraph}}, #' \code{\link{inducedConGraph}} #' @references Wermuth, N. \& Cox, D.R. (2004). Joint response graphs and #' separation induced by triangular systems. \emph{J.R. Statist. Soc. B}, 66, #' Part 3, 687-717. #' @keywords array algebra graphs multivariate #' @examples #' #' ## A simple way to find the overall induced concentration graph #' ## The DAG on p. 198 of Cox & Wermuth (1996) #' amat <- DAG(y1 ~ y2 + y3, y3 ~ y5, y4 ~ y5) #' A <- edgematrix(amat) #' In(crossprod(A)) #' `In` <- function (A) { ### Indicator matrix of structural zeros. abs(sign(A)) } "inducedChainGraph" <- function(amat, cc=rownames(amat), cond=NULL, type="LWF"){ ### Induced chain graph with chain components cc. inducedBlockGraph <- function(amat, sel, cond){ S <- inducedConGraph(amat, sel=union(cond, sel), cond=NULL) In(S[cond, sel, drop=FALSE]) } nod <- rownames(amat) nam <- c(list(cond), cc) if(!all(unlist(nam) %in% nod)) stop("The chain components or the conditioning set are wrong.") for(h in nam){ for(k in nam){ h <- unlist(h) k <- unlist(k) if (setequal(h,k)) next if(length(intersect(h,k) > 0)) stop("The sets are not disjoint!") } } nam <- unlist(nam) cg <- matrix(0, length(nam),length(nam)) dimnames(cg) <- list(nam,nam) kc <- length(cc) if(type=="AMP"){ for(i in 1:kc){ past <- unlist(cc[0:(i-1)]) Past <- union(cond,past) g <- cc[[i]] Sgg.r <- inducedConGraph(amat, sel=g, cond=Past) cg[g, g] <- Sgg.r if(length(past) !=0){ Pgr <- inducedRegGraph(amat, sel=g, cond=Past) cg[Past, g] <- Pgr } } } else if(type=="LWF"){ for(i in 1:kc){ past <- unlist(cc[0:(i-1)]) Past <- union(cond,past) g <- cc[[i]] Sgg.r <- inducedConGraph(amat,sel=g, cond=Past) cg[g, g] <- Sgg.r if(length(past) !=0){ Cgr <- inducedBlockGraph(amat, sel=g, cond=Past) cg[Past, g] <- Cgr } } } else if(type=="MRG"){ for(i in 1:kc){ past <- unlist(cc[0:(i-1)]) Past <- union(cond,past) g <- cc[[i]] Sgg.r <- inducedCovGraph(amat, sel=g, cond=Past) cg[g, g] <- Sgg.r if(length(past) != 0){ Pgr <- inducedRegGraph(amat, sel=g, cond=Past) cg[Past, g] <- Pgr } } } else stop("Wrong type.") n <- unlist(cc) cg[n,n, drop=FALSE] } "inducedConGraph" <- function(amat, sel=rownames(amat), cond=NULL){ ### Induced concentration graph for a set of nodes given a conditioning set. ancGraph <- function(A) { ## Edge matrix of the overall ancestor graph. if(sum(dim(A)) == 0) return(A) else return(In(solve(2*diag(nrow(A)) - A))) } trclos <- function(M) { ## Transitive closure of an UG with edge matrix M. See Wermuth and Cox (2004). edgematrix(transClos(adjMatrix(M))) } A <- edgematrix(amat) # From the adjacency matrix to edge matrix nod <- rownames(A) if(!all(cond %in% nod)) stop("Conditioning nodes are not among the vertices.") if(!all(sel %in% nod)) stop("Selected nodes are not among the vertices.") if(length(intersect(sel,cond) > 0)) stop("The sets are not disjoint!") l <- setdiff(nod, union(sel, cond)) # Marginal nodes g <- sel r <- cond L <- union(l,g) R <- union(g,r) Al <- ancGraph(A[l,l,drop=FALSE]) ARR.l <- In(A[R,R, drop=FALSE] + A[R,l, drop=FALSE]%*% Al %*% A[l,R, drop=FALSE]) TRl <- In(A[R,l,drop=FALSE] %*% Al) DRl <- In(diag(length(R)) + TRl %*% t(TRl)) out <- In(t(ARR.l) %*% trclos(DRl) %*% ARR.l) out <- out[g,g, drop=FALSE] adjMatrix(out)*10 } "inducedCovGraph" <- function(amat, sel=rownames(amat), cond=NULL){ ### Induced covariance graph for a set of nodes given a conditioning set. ancGraph <- function(A) { ## Edge matrix of the overall ancestor graph. if(sum(dim(A)) == 0) return(A) else return(In(solve(2*diag(nrow(A)) - A))) } trclos <- function(M) { ## Transitive closure of an UG with edge matrix M. See Wermuth and Cox (2004). edgematrix(transClos(adjMatrix(M))) } A <- edgematrix(amat) # From the adjacency matrix to edge matrix nod <- rownames(A) if(!all(cond %in% nod)) stop("Conditioning nodes are not among the vertices.") if(!all(sel %in% nod)) stop("Selected nodes are not among the vertices.") if(length(intersect(sel,cond) > 0)) stop("The sets are not disjoint!") l <- setdiff(nod, union(sel, cond)) # Marginalized over nodes g <- sel r <- cond L <- union(l,g) R <- union(g,r) AL <- ancGraph(A[L,L,drop=FALSE]) # In(solve(2*diag(length(L)) - A[L,L])) TrL <- In(A[r,L,drop=FALSE] %*% AL) DLr <- In(diag(length(L)) + t(TrL) %*% TrL) cl <- trclos(DLr) out <- In(AL %*% cl %*% t(AL)) out <- out[g,g, drop=FALSE] adjMatrix(out)*100 } "inducedDAG" <- function(amat, order, cond=NULL){ ### Induced DAG in a new ordering. cc=as.list(order) inducedChainGraph(amat, cc=cc, cond=cond) } "inducedRegGraph" <- function(amat, sel=rownames(amat), cond=NULL){ ### Induced regression graph for a set of nodes given a conditioning set. ancGraph <- function(A) { ## Edge matrix of the overall ancestor graph. if(sum(dim(A)) == 0) return(A) else return(In(solve(2*diag(nrow(A)) - A))) } trclos <- function(M) { ## Transitive closure of an UG with edge matrix M. See Wermuth and Cox (2004). edgematrix(transClos(adjMatrix(M))) } A <- edgematrix(amat) # From the adjacency matrix to edge matrix nod <- rownames(A) if(!all(cond %in% nod)) stop("Conditioning nodes are not among the vertices.") if(!all(sel %in% nod)) stop("Selected nodes are not among the vertices.") if(length(intersect(sel,cond) > 0)) stop("The sets are not disjoint!") l <- setdiff(nod, union(sel, cond)) # Nodes marginalized over g <- sel r <- cond L <- union(l,g) R <- union(g,r) AL <- ancGraph(A[L,L,drop=FALSE]) # A^{LL} TrL <- In(A[r,L,drop=FALSE] %*% AL) # T_{rL} DrL <- In(diag(length(r)) + TrL %*% t(TrL)) # D_{rr-L} Arr.L <- In(A[r,r, drop=FALSE] + A[r,L, drop=FALSE] %*% AL %*% A[L,r, drop=FALSE]) # A_{rr.L} FLr <- In(AL %*% A[L, r, drop=FALSE]) # F_{Lr} out <- In(AL %*% t(TrL) %*% trclos(DrL) %*% Arr.L + FLr) t(out[g,r, drop=FALSE]) } #' Graph queries #' #' Checks if a given graph is acyclic. #' #' #' @param amat a square Boolean matrix with dimnames, the adjacency matrix of a #' graph. #' @param method an integer 1 or 2 specifying the method used. If #' \code{method=1} the function calls the function \code{clusters} in package #' \code{igraph} to find the strong components: two nodes v and w are in the #' same strong component iff there are directed paths from v to w and from w to #' v. If \code{method=2} the function uses the \code{ggm} function #' \code{transClos}. Method 1 is faster. #' @return a logical value, \code{TRUE} if the graph is acyclic and #' \code{FALSE} otherwise. #' @author David Edwards, Giovanni M. Marchetti #' @references Aho, A.V., Hopcroft, J.E. \& Ullman, J.D. (1983). \emph{Data #' structures and algorithms.} Reading: Addison-Wesley. #' @keywords graphs models multivariate #' @examples #' #' ## A cyclic graph #' d <- matrix(0,3,3) #' rownames(d) <- colnames(d) <- c("x", "y", "z") #' d["x","y"] <- d["y", "z"] <- d["z", "x"] <- 1 #' ## Test if the graph is acyclic #' isAcyclic(d) #' isAcyclic(d, method = 1) #' `isAcyclic` <- function (amat, method = 2) { ### Tests if the graph is acyclic. if(method ==1){ G <- graph.adjacency(amat) return(max(clusters(G, mode = "strong")$csize) == 1) } else if(method ==2){ B <- transClos(amat) l <- B[lower.tri(B)] u <- t(B)[lower.tri(t(B))] com <- (l&u) return(all(!com)) } else{ stop("Wrong method.") } } #' G-identifiability of an UG #' #' Tests if an undirected graph is G-identifiable. #' #' An undirected graph is said G-identifiable if every connected component of #' the complementary graph contains an odd cycle (Stanghellini and Wermuth, #' 2005). See also Tarantola and Vicard (2002). #' #' @param amat a symmetric matrix with dimnames representing the adjacency #' matrix of an undirected graph #' @return a logical value, \code{TRUE} if the graph is G-identifiable and #' \code{FALSE} if it is not. #' @author Giovanni M. Marchetti #' @seealso \code{\link{UG}}, \code{\link{cmpGraph}}, \code{\link{cycleMatrix}} #' @references Stanghellini, E. \& Wermuth, N. (2005). On the identification of #' path-analysis models with one hidden variable. \emph{Biometrika}, 92(2), #' 337-350. #' #' Stanghellini, E. (1997). Identification of a single-factor model using #' graphical Gaussian rules. \emph{Biometrika}, 84, 241--244. #' #' Tarantola, C. \& Vicard, P. (2002). Spanning trees and identifiability of a #' single-factor model. \emph{Statistical Methods \& Applications}, 11, #' 139--152. #' #' Vicard, P. (2000). On the identification of a single-factor model with #' correlated residuals. \emph{Biometrika}, 87, 199--205. #' @keywords graphs models multivariate #' @examples #' #' ## A not G-identifiable UG #' G1 <- UG(~ a*b + u*v) #' isGident(G1) #' ## G-identifiable UG #' G2 <- UG(~ a + b + u*v) #' isGident(G2) #' ## G-identifiable UG #' G3 <- cmpGraph(UG(~a*b*c+x*y*z)) #' isGident(G3) #' "isGident" <- function(amat){ ### Is the UG with adjacency matrix amat G-identifiable? is.odd <- function(x) (x %% 2) == 1 cmpGraph <- function(amat){ ## Adjacency matrix of the complementary graph. g <- 0+ (!amat) diag(g) <- 0 g } cgr <- cmpGraph(amat) cc <- conComp(cgr) l <- unique(cc) k <- length(l) g <- rep(k, 0) for(i in 1:k){ subg <- cgr[cc==i, cc==i, drop=FALSE] m <- cycleMatrix(subg) if(is.null(m)) rt <- 0 else rt <- apply(m, 1, sum) g[i] <- any(is.odd(rt)) } all(g) } "pa" <- function (nn, amat) { ### List of the parents of nodes nn for a given with adjacency matrix amat. nod <- rownames(amat) if(is.null(nod)) stop("The adjacency matrix must have dimnames!") if(!all(is.element(nn, nod))) stop("Some of the nodes are not among the vertices.") k <- length(nn) p <- vector(k, mode="list") A <- 0 + ((amat != t(amat)) & (amat == 1)) # Select the directed edges for(i in 1:k) { p[[i]] <- nod[A[,nn[i]]==1 ] } setdiff(unique(unlist(p)), nn) } #' Partial correlations #' #' Finds the matrix of the partial correlations between pairs of variables #' given the rest. #' #' The algorithm computes \eqn{- \sigma^{rs}/(\sigma^{rr} \sigma^{ss})^{1/2}} #' where the \eqn{\sigma^{rs}} are concentrations, i.e. elements of the inverse #' covariance matrix. #' #' @param S a symmetric positive definite matrix, representing a covariance #' matrix. #' @return A symmetric matrix with ones along the diagonal and in position #' \eqn{(r,s)} the partial correlation between variables \eqn{r} and \eqn{s} #' given all the remaining variables. #' @author Giovanni M. Marchetti #' @seealso \code{\link{var}}, \code{\link{cor}}, \code{\link{correlations}} #' @references Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' @keywords array graphs models multivariate #' @examples #' #' ### Partial correlations for the mathematics marks data #' data(marks) #' S <- var(marks) #' parcor(S) #' "parcor" <- function (S) { ### Finds the partial correlation matrix of the variables given the rest. ### S is the covariance matrix. p <- ncol(S) K <- solve(S) a <- 1/sqrt(diag(K)) K <- K * outer(a, a) out <- 2 * diag(p) - K dimnames(out) <- dimnames(S) out } #' Partial correlation #' #' Computes the partial correlation between two variables given a set of other #' variables. #' #' #' @param u a vector of integers of length > 1. The first two integers are the #' indices of variables the correlation of which must be computed. The rest of #' the vector is the conditioning set. #' @param S a symmetric positive definite matrix, a sample covariance matrix. #' @return a scalar, the partial correlation matrix between variables #' \code{u[1]} and \code{u[2]} given \code{u[-c(1,2)]}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{cor}}, \code{\link{parcor}}, \code{\link{correlations}} #' @keywords models multivariate #' @examples #' #' data(marks) #' ## The correlation between vectors and algebra given analysis and statistics #' pcor(c("vectors", "algebra", "analysis", "statistics"), var(marks)) #' ## The same #' pcor(c(2,3,4,5), var(marks)) #' ## The correlation between vectors and algebra given statistics #' pcor(c("vectors", "algebra", "statistics"), var(marks)) #' ## The marginal correlation between analysis and statistics #' pcor(c("analysis","statistics"), var(marks)) #' "pcor" <- function (u, S) { ### Partial correlation between u[1:2], given th rest of u. S: cov matrix. k <- solve(S[u,u]) -k[1,2]/sqrt(k[1,1]*k[2,2]) } #' Test for zero partial association #' #' Test for conditional independence between two variables, given the other #' ones, assuming a multivariate normal distribution. #' #' #' @param r a partial correlation coefficient, computed by \code{\link{pcor}}. #' @param q the number of variables in the conditioning set. #' @param n integer > 0, the sample size. #' @return \item{tval}{The Student's t-test statistic.} \item{df}{The degrees #' of freedom} \item{pvalue}{The P-value, assuming a two-sided alternative.} #' @author Giovanni M. Marchetti #' @seealso \code{\link{pcor}}, \code{\link{shipley.test}} #' @keywords htest multivariate #' @examples #' #' ## Are 2,3 independent given 1? #' data(marks) #' pcor.test(pcor(c(2,3,1), var(marks)), 1, n=88) #' "pcor.test" <- function(r, q, n){ df = n - 2 - q tval <- r * sqrt(df)/sqrt(1-r*r) pv <- 2 * pt(-abs(tval), df) list(tval = tval, df = df, pvalue = pv) } #' Random correlation matrix #' #' Generates a random correlation matrix with the method of Marsaglia and Olkin #' (1984). #' #' The algorithm uses \code{\link{rsphere}} to generate \eqn{d} vectors on a #' sphere in \eqn{d}-space. If \eqn{Z} is a matrix with such vectors as rows, #' then the random correlation matrix is \eqn{ZZ'}. #' #' @param d an integer > 0, the order of the correlation matrix. #' @return a correlation matrix of order \code{d}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{rsphere}} #' @references Marshall, G.\& Olkin, I. (1984).Generating correlation matrices. #' \emph{SIAM J. Sci. Stat. Comput.}, 5, 2, 470--475. #' @keywords distribution multivariate #' @examples #' #' ## A random correlation matrix of order 3 #' rcorr(3) #' ## A random correlation matrix of order 5 #' rcorr(5) #' "rcorr" <- function(d) { # Generates a random correlation matrix of dimension d # with the method of Marsaglia and Olkin (1984). h<-rsphere(d,d) h %*% t(h) } #' Random sample from a decomposable Gaussian model #' #' Generates a sample from a mean centered multivariate normal distribution #' whose covariance matrix has a given triangular decomposition. #' #' The value in position \eqn{(i,j)} of \code{A} (with \eqn{i < j}) is a #' regression coefficient (with sign changed) in the regression of variable #' \eqn{i} on variables \eqn{i+1, \dots, d}. #' #' The value in position \eqn{i} of \code{Delta} is the residual variance in #' the above regression. #' #' @param n an integer > 0, the sample size. #' @param A a square, upper triangular matrix with ones along the diagonal. It #' defines, together with \code{Delta}, the concentration matrix (and also the #' covariance matrix) of the multivariate normal. The order of \code{A} is the #' number of components of the normal. #' @param Delta a numeric vector of length equal to the number of columns of #' \code{A}. #' @return a matrix with \code{n} rows and \code{nrow(A)} columns, a sample #' from a multivariate normal distribution with mean zero and covariance matrix #' \code{S = solve(A) %*% diag(Delta) %*% t(solve(A))}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{triDec}}, \code{\link{fitDag}} #' @references Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' @keywords distribution multivariate #' @examples #' #' ## Generate a sample of 100 observation from a multivariate normal #' ## The matrix of the path coefficients #' A <- matrix( #' c(1, -2, -3, 0, 0, 0, 0, #' 0, 1, 0, -4, 0, 0, 0, #' 0, 0, 1, 2, 0, 0, 0, #' 0, 0, 0, 1, 1, -5, 0, #' 0, 0, 0, 0, 1, 0, 3, #' 0, 0, 0, 0, 0, 1, -4, #' 0, 0, 0, 0, 0, 0, 1), 7, 7, byrow=TRUE) #' D <- rep(1, 7) #' X <- rnormDag(100, A, D) #' #' ## The true covariance matrix #' solve(A) %*% diag(D) %*% t(solve(A)) #' #' ## Triangular decomposition of the sample covariance matrix #' triDec(cov(X))$A #' "rnormDag" <- function (n, A, Delta) { ### Generates n observations from a multivariate normal with mean 0 ### and a covariance matrix A^-1 Delta (A^-1)'. p <- length(Delta) E <- matrix(0, n, p) for(j in 1:p) { E[,j] <- rnorm(n, 0, sqrt(Delta[j])) } B <- solve(A) Y <- E %*% t(B) colnames(Y) <- colnames(A) Y } #' Random vectors on a sphere #' #' Generates a sample of points uniformly distributed on the surface of a #' sphere in d-space. #' #' The algorithm is based on normalizing to length 1 each d-vector of a sample #' from a multivariate normal \eqn{N(0, I)}. #' #' @param n an integer, the sample size. #' @param d an integer, the dimension of the space. For example, a circle is #' defined in 2D-space, a sphere in 3D-space. #' @return a matrix of \code{n} rows and \code{d} columns. #' @author Giovanni M. Marchetti #' @seealso \code{\link{rnorm}}, \code{\link{rcorr}} #' @keywords distribution multivariate #' @examples #' #' ## 100 points on circle #' z <- rsphere(100,2) #' plot(z) #' #' ## 100 points on a sphere #' z <- rsphere(100, 3) #' pairs(z) #' "rsphere" <- function(n, d) { ## Generates n random vectors uniformly dist. on the ## surface of a sphere, in d dimensions. X <- matrix(rnorm(n*d),n,d) d <- apply(X, 1, function(x) sqrt(sum(x*x))) sweep(X, 1, d, "/") } #' Test of all independencies implied by a given DAG #' #' Computes a simultaneous test of all independence relationships implied by a #' given Gaussian model defined according to a directed acyclic graph, based on #' the sample covariance matrix. #' #' The test statistic is \eqn{C = -2 \sum \ln p_j} where \eqn{p_j} are the #' p-values of tests of conditional independence in the basis set computed by #' \code{basiSet(A)}. The p-values are independent uniform variables on #' \eqn{(0,1)} and the statistic has exactly a chi square distribution on #' \eqn{2k} degrees of freedom where \eqn{k} is the number of elements of the #' basis set. Shipley (2002) calls this test Fisher's C test. #' #' @param amat a square Boolean matrix, of the same dimension as \code{S}, #' representing the adjacency matrix of a DAG. #' @param S a symmetric positive definite matrix, the sample covariance matrix. #' @param n a positive integer, the sample size. #' @return \item{ctest}{Test statistic \eqn{C}.} \item{df}{Degrees of freedom.} #' \item{pvalue}{The P-value of the test, assuming a two-sided alternative.} #' @author Giovanni M. Marchetti #' @seealso \code{\link{basiSet}}, \code{\link{pcor.test}} #' @references Shipley, B. (2000). A new inferential test for path models based #' on directed acyclic graphs. \emph{Structural Equation Modeling}, 7(2), #' 206--218. #' @keywords graphs models multivariate #' @examples #' #' ## A decomposable model for the mathematics marks data #' data(marks) #' dag <- DAG(mechanics ~ vectors+algebra, vectors ~ algebra, #' statistics ~ algebra+analysis, analysis ~ algebra) #' shipley.test(dag, cov(marks), n=88) #' "shipley.test" <- function (amat, S, n) { ### Overall d-separation test. See Shipley (2000). ### amat: adjacency matrix; S: covariance matrix; n: observations. pval <- function(r, q, n){ ## See pcor df = n - 2 - q tval <- r * sqrt(df)/sqrt(1-r*r) 2 * pt(-abs(tval), df) } l <- basiSet(amat) k <- length(l) p <- rep(0, k) for(i in 1:k){ r <- pcor(l[[i]], S) q <- length(l[[i]]) - 2 p[i] <- pval(r, q, n) } ctest <- -2 * sum(log(p)) df <- 2*k pv <- 1 - pchisq(ctest, df) list(ctest=ctest, df=df, pvalue=pv) } #' Sweep operator #' #' Sweeps a covariance matrix with respect to a subset of indices. #' #' The sweep operator has been introduced by Beaton (1964) as a tool for #' inverting symmetric matrices (see Dempster, 1969). #' #' @param V a symmetric positive definite matrix, the covariance matrix. #' @param b a subset of indices of the columns of \code{V}. #' @return a square matrix \code{U} of the same order as \code{V}. If \code{a} #' is the complement of \code{b}, then \code{U[a,b]} is the matrix of #' regression coefficients of \code{a} given \code{b} and \code{U[a,a]} is the #' corresponding covariance matrix of the residuals. #' #' If \code{b} is empty the function returns \code{V}. #' #' If \code{b} is the vector \code{1:nrow(V)} (or its permutation) then the #' function returns the opposite of the inverse of \code{V}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{fitDag}} #' @references Beaton, A.E. (1964). \emph{The use of special matrix operators #' in statistical calculus}. Ed.D. thesis, Harvard University. Reprinted as #' Educational Testing Service Research Bulletin 64-51. Princeton. #' #' Dempster, A.P. (1969). \emph{Elements of continuous multivariate analysis}. #' Reading: Addison-Wesley. #' @keywords array algebra models multivariate #' @examples #' #' ## A very simple example #' V <- matrix(c(10, 1, 1, 2), 2, 2) #' swp(V, 2) #' "swp" <- function (V, b) { ### SWP operator. V is the covariance matrix, b is a subset of indices. p <- ncol(V) u <- is.na(match(1:p, b)) a <- (1:p)[u] out <- 0 * V dimnames(out) <- dimnames(V) if (length(a) == 0) return(-solve(V)) else if (length(a) == p) return(V) else{ Saa <- V[a, a, drop = FALSE] Sab <- V[a, b, drop = FALSE] Sbb <- V[b, b, drop = FALSE] B <- Sab %*% solve(Sbb) out[a, a] <- Saa - B %*% t(Sab) out[a, b] <- B out[b, a] <- t(B) out[b, b] <- -solve(Sbb) return(out) } ## list(swept = out, coef = out[a, b], rss = out[a, a, drop = F]) } "topOrder" <- function (amat) { ### Return the nodes in topological order (parents before children). ### Translated from: Kevin Murphy's BNT. if(!isAcyclic(amat)) stop("The graph is not acyclic!") n <- nrow(amat) nod <- 1:n indeg <- rep(0, n) up <- !amat[lower.tri(amat)] if(all(up)) return(nod) zero.indeg <- c() # a stack of nodes with no parents for(i in nod) { indeg[i] <- sum(amat[,i]) if(indeg[i] == 0) zero.indeg <- c(i, zero.indeg) } s <- 1 ord <- rep(0, n) while(length(zero.indeg) > 0){ v <- zero.indeg[1] # pop v zero.indeg <- zero.indeg[-1] ord[s] <- v s <- s + 1 cs <- nod[amat[v,]==1] if(length(cs) == 0) next for(j in 1:length(cs)){ k <- cs[j] indeg[k] <- indeg[k] - 1 if(indeg[k] == 0) zero.indeg <- c(k, zero.indeg) # push k } } ord } #' Topological sort #' #' \code{topOrder} returns the topological order of a directed acyclic graph #' (parents, before children). \code{topSort} permutates the adjacency matrix #' according to the topological order. #' #' The topological order needs not to be unique. After the permutation the #' adjacency matrix of the graph is upper triangular. The function is a #' translation of the Matlab function \code{topological_sort} in Toolbox #' \pkg{BNT} written by Kevin P. Murphy. #' #' @aliases topSort topOrder #' @param amat a square Boolean matrix with dimnames, representing the #' adjacency matrix of a directed acyclic graph. #' @return \code{topOrder(amat)} returns a vector of integers representing the #' permutation of the nodes. \code{topSort(amat)} returns the adjacency matrix #' with rows and columns permutated. #' @note The order of the nodes defined by \code{DAG} is that of their first #' appearance in the model formulae (from left to right). #' @author Kevin P. Murphy, Giovanni M. Marchetti #' @seealso \code{\link{DAG}}, \code{\link{isAcyclic}} #' @references Aho, A.V., Hopcrtoft, J.E. \& Ullman, J.D. (1983). \emph{Data #' structures and algorithms.} Reading: Addison-Wesley. #' #' Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon Press. #' @keywords graphs models multivariate #' @examples #' #' ## A simple example #' dag <- DAG(a ~ b, c ~ a + b, d ~ c + b) #' dag #' topOrder(dag) #' topSort(dag) #' "topSort" <- function (amat) { ### Topological sort of the DAG with adjacency matrix amat. ord <- topOrder(amat) amat[ord, ord] } #' Transitive closure of a graph #' #' Computes the transitive closure of a graph (undirected or directed acyclic). #' #' The transitive closure of a directed graph with adjacency matrix \eqn{A} is #' a graph with adjacency matrix \eqn{A^*} such that \eqn{A^*_{i,j} = 1} if #' there is a directed path from \eqn{i} to \eqn{j}. The transitive closure of #' an undirected graph is defined similarly (by substituting path to directed #' path). #' #' @param amat a Boolean matrix with dimnames representing the adjacency matrix #' of a graph. #' @return \item{A}{The adjacency matrix of the transitive closure.} #' @author Giovanni M. Marchetti #' @seealso \code{\link{DAG}}, \code{\link{UG}} #' @keywords graphs models multivariate #' @examples #' #' ## Closure of a DAG #' d <- DAG(y ~ x, x ~ z) #' transClos(d) #' #' ## Closure of an UG #' g <- UG(~ x*y*z+z*u+u*v) #' transClos(g) #' `transClos` <- function (amat) { ### Transitive closure of the relation with adjacency matrix amat. if (nrow(amat) == 1) return(amat) A <- amat diag(A) <- 1 repeat { B <- sign(A %*% A) if (all(B == A)) break else A <- B } diag(A) <- 0 A } #' Triangular decomposition of a covariance matrix #' #' Decomposes a symmetric positive definite matrix with a variant of the #' Cholesky decomposition. #' #' Any symmetric positive definite matrix \eqn{\Sigma}{Sigma} can be decomposed #' as \eqn{\Sigma = B \Delta B^T}{Sigma = B %*% Delta %*% t(B)} where \eqn{B} #' is upper triangular with ones along the main diagonal and #' \eqn{\Delta}{Delta} is diagonal. If \eqn{\Sigma}{Sigma} is a covariance #' matrix, the concentration matrix is \eqn{\Sigma^{-1} = A^T \Delta^{-1} A} #' where \eqn{A = B^{-1}} is the matrix of the regression coefficients (with #' the sign changed) of a system of linear recursive regression equations with #' independent residuals. In the equations each variable \eqn{i} is regressed #' on the variables \eqn{i+1, \dots, d}. The elements on the diagonal of #' \eqn{\Delta} are the partial variances. #' #' @param Sigma a symmetric positive definite matrix. #' @return \item{A}{a square upper triangular matrix of the same order as #' \code{Sigma} with ones on the diagonal.} \item{B}{the inverse of \code{A}, #' another triangular matrix with unit diagonal.} \item{Delta}{a vector #' containing the diagonal values of \eqn{\Delta}.} #' @author Giovanni M. Marchetti #' @seealso \code{\link{chol}} #' @references Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' @keywords array algebra models multivariate #' @examples #' #' ## Triangular decomposition of a covariance matrix #' B <- matrix(c(1, -2, 0, 1, #' 0, 1, 0, 1, #' 0, 0, 1, 0, #' 0, 0, 0, 1), 4, 4, byrow=TRUE) #' B #' D <- diag(c(3, 1, 2, 1)) #' S <- B %*% D %*% t(B) #' triDec(S) #' solve(B) #' "triDec" <- function(Sigma){ ### Triangular decomposition of covariance matrix Sigma. R = chol(solve(Sigma)) dimnames(R) = dimnames(Sigma) D = diag(R) A = diag(1/D) %*% R dimnames(A) <- dimnames(Sigma) B = solve(A) list(A = A, B = B, Delta = 1/(D^2)) } #' Defining an undirected graph (UG) #' #' A simple way to define an undirected graph by means of a single model #' formula. #' #' The undirected graph \eqn{G = (V, E)} is defined by a set of nodes \eqn{V} #' and a set of pairs \eqn{E}. The set of pairs is defined by the set of #' interactions in the formula. Interactions define complete subgraphs (not #' necessarily maximal) of the UG. The best way is to specify interactions #' that match the cliques of the undirected graph. This is the standard way to #' define graphical models for contingency tables. Remember that some #' hierarchical models are not graphical, but they imply the same graph. #' #' The function returns the edge matrix of the graph, i.e. a square Boolean #' matrix of order equal to the number of nodes of the graph and a one in #' position \eqn{(i,j)} if there is an arrow from \eqn{j} to \eqn{i} and zero #' otherwise. By default this matrix has ones along the main diagonal. For UGs #' this matrix is symmetric. The dimnames of the edge matrix are the nodes of #' the UG. #' #' @param f a single model formula without response #' @return a Boolean matrix with dimnames, the adjacency matrix of the #' undirected graph. #' @author Giovanni M. Marchetti #' @seealso \code{\link{fitConGraph}}, \code{\link{fitCovGraph}}, #' \code{\link{DAG}} #' @references Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon #' Press. #' @keywords graphs models multivariate #' @examples #' #' ## X independent of Y given Z #' UG(~ X*Z + Y*Z) #' #' # The saturated model #' UG(~ X*Y*Z) #' #' ## The model without three-way interactions has the same graph #' UG(~ X*Y + Y*Z + Z*X) #' UG(~ (X + Y + Z)^2) #' #' ## Butterfly model defined from the cliques #' UG(~ mec*vec*alg + alg*ana*sta) #' #' ## Some isolated nodes #' UG(~x*y*z + a + b) #' "UG" <- function (f) { ### Defines an UG from a model formula. Returns the adj. matrix. tt <- terms(f) if (attr(tt, "response") == 1) stop("You should not specify a response!") nod <- dimnames(attr(tt, "factors"))[[1]] N <- unique(nod) # set of nodes dN <- length(N) # number of nodes amat <- matrix(0, dN, dN) o <- attr(tt, "order") <= 2 v <- attr(tt, "factors")[, o, drop = FALSE] m <- match(dimnames(v)[[1]], N) for (i in 1:sum(o)) { ij <- m[v[, i] == 1] amat[ij[1], ij[2]] <- 1 amat[ij[2], ij[1]] <- 1 } dimnames(amat) <- list(N, N) amat } #' Mixed Graphs #' #' Defines a loopless mixed graph from the directed, undirected and undirected #' components. #' #' A loopless mixed graph is a mixed graph with three types of edges: #' undirected, directed and bi-directed edges. Note that the three adjacency #' matrices must have labels and may be defined using the functions \code{DG}, #' \code{DAG} or \code{UG}. The adjacency matrices of the undirected graphs #' may be just symmetric Boolean matrices. #' #' @param dg the adjacency matrix of a directed graph specifying the arrows of #' the mixed graph. #' @param ug the adjacency matrix of an undirected graph specifying the lines #' of the mixed graph. #' @param bg the adjacency matrix of an undirected graph specifying the #' bidirected edges of the mixed graph. #' @return a square matrix obtained by combining the three graph components #' into an adjacency matrix of a mixed graph. The matrix consists of 4 #' different integers as an \eqn{ij}-element: 0 for a missing edge between #' \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full #' line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between #' \eqn{i} and \eqn{j}. These numbers are added to be associated with multiple #' edges of different types. The matrix is symmetric w.r.t full lines and #' bi-directed arrows. #' @author Giovanni M. Marchetti, Mathias Drton #' @seealso \code{\link{UG}}, \code{\link{DAG}} #' @references Richardson, T. S. and Spirtes, P. (2002). Ancestral Graph Markov #' Models. \emph{Annals of Statistics}, 30(4), 962--1030. #' @keywords graphs ancestral graph mixed graph models multivariate #' @examples #' #' ## Examples from Richardson and Spirtes (2002) #' a1 <- makeMG(dg=DAG(a~b, b~d, d~c), bg=UG(~a*c)) #' isAG(a1) # Not an AG. (a2) p.969 #' a2 <- makeMG(dg=DAG(b ~ a, d~c), bg=UG(~a*c+c*b+b*d)) # Fig. 3 (b1) p.969 #' isAG(a1) #' a3 <- makeMG(ug = UG(~ a*c), dg=DAG(b ~ a, d~c), bg=UG(~ b*d)) # Fig. 3 (b2) p.969 #' a5 <- makeMG(bg=UG(~alpha*beta+gamma*delta), dg=DAG(alpha~gamma, #' delta~beta)) # Fig. 6 p. 973 #' ## Another Example #' a4 <- makeMG(ug=UG(~y0*y1), dg=DAG(y4~y2, y2~y1), bg=UG(~y2*y3+y3*y4)) #' ## A mixed graphs with double edges. #' mg <- makeMG(dg = DG(Y ~ X, Z~W, W~Z, Q~X), ug = UG(~X*Q), #' bg = UG(~ Y*X+X*Q+Q*W + Y*Z) ) #' ## Chronic pain data: a regression graph #' chronic.pain <- makeMG(dg = DAG(Y ~ Za, Za ~ Zb + A, Xa ~ Xb, #' Xb ~ U+V, U ~ A + V, Zb ~ B, A ~ B), bg = UG(~Za*Xa + Zb*Xb)) #' `makeMG` <- function (dg = NULL, ug = NULL, bg = NULL) { dg.nodes <- rownames(dg) ug.nodes <- rownames(ug) bg.nodes <- rownames(bg) ver <- unique(c(dg.nodes, ug.nodes, bg.nodes)) d <- length(ver) amat <- matrix(0, d, d) dimnames(amat) <- list(ver, ver) amat.dg <- amat amat.ug <- amat amat.bg <- amat if (!is.null(dg)) amat.dg[dg.nodes, dg.nodes] <- dg if (!is.null(ug)) amat.ug[ug.nodes, ug.nodes] <- ug * 10 if (!is.null(bg)) amat.bg[bg.nodes, bg.nodes] <- bg * 100 amat.dg + amat.ug + amat.bg } #' Loopless mixed graphs components #' #' Splits the adjacency matrix of a loopless mixed graph into three components: #' directed, undirected and bi-directed. #' #' The matrices \code{ug}, and \code{bg} are just symmetric Boolean matrices. #' #' @param amat a square matrix, with dimnames, representing a loopless mixed #' graph. The matrix consists of 4 different integers as an \eqn{ij}-element: 0 #' for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} #' to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 for a #' bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added to be #' associated with multiple edges of different types. The matrix is symmetric #' w.r.t full lines and bi-directed arrows. #' @return It is the inverse of \code{makeAG}. It returns the following #' components. \item{dg}{the adjacency matrix of the directed edges.} #' \item{ug}{the adjacency matrix of the undirected edges.} \item{bg}{the #' adjacency matrix of the bi-directed edges.} #' @author Mathias Drton, Giovanni M. Marchetti #' @seealso \code{\link{makeMG}} #' @keywords graphs ancestral graph mixed graph models multivariate #' @examples #' #' ag <- makeMG(ug=UG(~y0*y1), dg=DAG(y4~y2, y2~y1), bg=UG(~y2*y3+y3*y4)) #' isAG(ag) #' unmakeMG(ag) #' `unmakeMG` <- function(amat){ ### Returns a list with the three components of a loopless MG. d <- nrow(amat) ug <- dg <- bg <- amat M <- expand.grid(dg = 0:1,ug = 0:1,bg = 0:1) i <- strtoi(as.character(amat), 2) GG <- M[i+1,] ug[,] <- GG[,2] dg[,] <- GG[,1] bg[,] <- GG[,3] if(any(ug!=t(ug))) stop("Undirected edges are wrongly coded.") if(any(bg!=t(bg))) stop("Undirected edges are wrongly coded.") return(list(dg = dg, ug = ug, bg = bg)) } #' Directed graphs #' #' Defines the adjacency of a directed graph. #' #' The directed graph is defined by a sequence of models formulae. For each #' formula the response defines a node of the graph and its parents. The graph #' contains no loops. #' #' @param \dots a sequence of model formulae #' @return the adjacency matrix of the directed graph, i.e., a square Boolean #' matrix of order equal to the number of nodes of the graph and a one in #' position \eqn{(i,j)} if there is an arrow from \eqn{i} to \eqn{j} and zero #' otherwise. The dimnames of the adjacency matrix are the labels for the #' nodes of the graph. #' @author G. M. Marchetti #' @seealso \code{\link{DAG}}, \code{\link{UG}} #' @references Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon #' Press. #' @keywords graphs directed graph models multivariate #' @examples #' #' ## A DAG #' DG(y ~ x, x ~ z, z ~ u) #' #' ## A cyclic directed graph #' DG(y ~ x, x ~ z, z ~ y) #' #' ## A graph with two arrows between two nodes #' DG(y ~ x, x ~ y) #' #' ## There can be isolated nodes #' DG(y ~ x, x ~ x) #' `DG` <- function (...) { f <- list(...) nb <- length(f) nod <- c() for (k in 1:nb) { tt <- terms(f[[k]], specials = "I") vars <- dimnames(attr(tt, "factors"))[[1]] skip <- attr(tt, "specials")$I if (!is.null(skip)) vars <- vars[-skip] nod <- c(nod, vars) } N <- unique(nod) dN <- length(N) amat <- matrix(0, dN, dN) for (k in 1:nb) { tt <- terms(f[[k]], specials = "I") vars <- dimnames(attr(tt, "factors"))[[1]] if (attr(tt, "response") == 1) { j <- match(vars[1], N) i <- match(vars[-1], N) amat[i, j] <- 1 } else if (attr(tt, "response") == 0) stop("Some equations have no response") } dimnames(amat) <- list(N, N) amat } #' Ancestral graph #' #' Check if it is an adjacency matrix of an ancestral graph #' #' Checks if the following conditions must hold: (i) no undirected edge meets #' an arrowhead; (ii) no directed cycles; (iii) spouses cannot be ancestors. #' For details see Richardson and Spirtes (2002). #' #' @param amat %% ~~Describe \code{amat} here~~ #' @return A logical value, \code{TRUE} if it is an ancestral graph and #' \code{FALSE} otherwise. #' @author Giovanni M. Marchetti, Mathias Drton #' @seealso \code{\link{makeMG}}, \code{\link{isADMG}} #' @references Richardson, T. S. and Spirtes, P. (2002). Ancestral Graph Markov #' Models. \emph{Annals of Statistics}, 30(4), 962--1030. #' @keywords graphs ancestral graph mixed graph models multivariate #' @examples #' #' ## Examples from Richardson and Spirtes (2002) #' a1 <- makeMG(dg=DAG(a~b, b~d, d~c), bg=UG(~a*c)) #' isAG(a1) # Not an AG. (a2) p.969 #' a2 <- makeMG(dg=DAG(b ~ a, d~c), bg=UG(~a*c+c*b+b*d)) # Fig. 3 (b1) p.969 #' isAG(a2) #' `isAG` <- function(amat) { ### checks if a graph is an ancestral graph comp <- unmakeMG(amat) ug <- comp$ug; dag = comp$dg; bg = comp$bg out <- TRUE if(any(amat > 100)){ warning("There are double edges.") out <- FALSE } anteriorGraph <- function (amat) { ## Adjacency matrix of the anterior graph from an AG. A <- 0 + ((amat == 1) |(amat == 10)) # Select the directed and undirected edges transClos(A) } if(any(apply(dag, 2, sum) & apply(ug, 2, sum))){ warning("Undirected edges meeting a directed edge.") out <- FALSE } if(any(apply(bg, 2, sum) & apply(ug, 2, sum))){ warning("Undirected edges meeting a bidirected edge.") out <- FALSE } H <- anteriorGraph(amat) if(any((H==1) & (amat == 100))){ warning("Spouses cannot be ancestors.") out <- FALSE } out } #' Acyclic directed mixed graphs #' #' Check if it is an adjacency matrix of an ADMG #' #' Checks if the following conditions must hold: (i) no undirected edge meets #' an arrowhead; (ii) no directed cycles; #' #' @param amat %% ~~Describe \code{amat} here~~ #' @return A logical value, \code{TRUE} if it is an ancestral graph and #' \code{FALSE} otherwise. #' @author Giovanni M. Marchetti, Mathias Drton #' @seealso \code{\link{makeMG}}, \code{\link{isADMG}} #' @references Richardson, T. S. and Spirtes, P. (2002). Ancestral Graph Markov #' Models. \emph{Annals of Statistics}, 30(4), 962--1030. #' @keywords graphs ancestral graph mixed graph models multivariate #' @examples #' #' ## Examples from Richardson and Spirtes (2002) #' a1 <- makeMG(dg=DAG(a~b, b~d, d~c), bg=UG(~a*c)) #' isADMG(a1) # Not an AG. (a2) p.969 #' a2 <- makeMG(dg=DAG(b ~ a, d~c), bg=UG(~a*c+c*b+b*d)) # Fig. 3 (b1) p.969 #' isADMG(a2) #' `isADMG`<- function(amat){ ### check is if a graph is an ADMG comp <- unmakeMG(amat) ug <- comp$ug; dag <- comp$dg; bg <- comp$bg out <- TRUE if(any(amat > 100)){ warning("There are double edges.") out <- FALSE } if(!isAcyclic(dag)){ warning("Not acyclic.") out <- FALSE } out } #' Plot of a mixed graph #' #' Plots a mixed graph from an adjacency matrix, a \code{graphNEL} object, an #' \code{\link{igraph}} object, or a descriptive vector. #' #' \code{plotGraph} uses \code{\link{plot}} and \code{\link{tkplot}} in #' \pkg{\link{igraph}} package. #' #' @param a An adjacency matrix: a matrix that consists of 4 different integers #' as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and \eqn{j}, 1 #' for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and #' \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and \eqn{j}. These #' numbers can be added to generate multiple edges of different types. The #' matrix must be symmetric w.r.t full lines and bi-directed arrows. Or a #' graph that can be a \code{graphNEL} or an \code{\link{igraph}} object.Or a #' vector of length \eqn{3e}, where \eqn{e} is the number of edges of the #' graph, that is a sequence of triples (type,node1label,node2label). The type #' of edge can be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), #' and \code{"l"} (lines). #' @param dashed A logical value. If \code{TRUE} the bi-directed edges are #' plotted as undirected dashed edges. #' @param tcltk A logical value. If \code{TRUE} the function opens a tcltk #' device to plot the graphs, allowing the interactive manimulation of the #' graph. If \code{FALSE}the function opens a standard device without #' interaction. #' @param layout The name of a function used to compute the (initial) layout of #' the graph. The default is \code{layout.auto}. This can be further adjusted #' if \code{tcltk} is \code{TRUE}. #' @param directed A logical value. If \code{FALSE} a symmetric adjacency #' matrix with entries 1 is interpreted as an undirected graph. If \code{FALSE} #' it is interpreted as a directed graph with double arrows. If \code{a} is not #' an adjacency matrix, it is ignored. #' @param noframe A logical value. If \code{TRUE}, then the nodes are not #' circled. #' @param nodesize An integer denoting the size of the nodes (default 15). It #' can be increased to accommodate larger labels. #' @param vld An integer defining the distance between a vertex and its label. #' Defaults to 0. #' @param vc Vertex color. Default is "gray". #' @param vfc Vertex frame color. Default is "black". #' @param colbid Color of the bi-directed edges. Default is "FireBrick3". #' @param coloth Color of all the other edges. Default is "black". #' @param cex An integer (defaults to 1) to adjust the scaling of the font of #' the labels. #' @param \dots Further arguments to be passed to \code{plot} or \code{tkplot}. #' @return Plot of the associated graph and returns invisibly a list with two #' slots: \code{tkp.id}, \code{graph}, the input graph as an \code{igraph} #' object. The id can be used to get the layout of the adjusted graph. The #' bi-directed edges are plotted in red. #' @author Kayvan Sadeghi, Giovanni M. Marchetti #' @seealso \code{\link{grMAT}}, \code{\link{tkplot}}, \code{\link{drawGraph}}, #' \code{\link{plot.igraph}} #' @keywords graphs adjacency matrix mixed graphs plot #' @examples #' #' exvec<-c("b",1,2,"b",1,14,"a",9,8,"l",9,11, #' "a",10,8,"a",11,2,"a",11,9,"a",11,10, #' "a",12,1,"b",12,14,"a",13,10,"a",13,12) #' plotGraph(exvec) #' ############################################ #' amat<-matrix(c(0,11,0,0,10,0,100,0,0,100,0,1,0,0,1,0),4,4) #' plotGraph(amat) #' plotGraph(makeMG(bg = UG(~a*b*c+ c*d), dg = DAG(a ~ x + z, b ~ z ))) #' plotGraph(makeMG(bg = UG(~a*b*c+ c*d), dg = DAG(a ~ x + z, b ~ z )), dashed = TRUE) #' # A graph with double and triple edges #' G <- #' structure(c(0, 101, 0, 0, 100, 0, 100, 100, 0, 100, 0, 100, 0, #' 111, 100, 0), .Dim = c(4L, 4L), .Dimnames = list(c("X", "Z", #' "Y", "W"), c("X", "Z", "Y", "W"))) #' plotGraph(G) #' # A regression chain graph with longer labels #' plotGraph(makeMG(bg = UG(~Love*Constraints+ Constraints*Reversal+ Abuse*Distress), #' dg = DAG(Love ~ Abuse + Distress, Constraints ~ Distress, Reversal ~ Distress, #' Abuse ~ Fstatus, Distress ~ Fstatus), #' ug = UG(~Fstatus*Schooling+ Schooling*Age)), #' dashed = TRUE, noframe = TRUE) #' # A graph with 4 edges between two nodes. #' G4 = matrix(0, 2, 2); G4[1,2] = 111; G4[2,1] = 111 #' plotGraph(G4) #' `plotGraph` <- function (a, dashed = FALSE, tcltk = TRUE, layout = layout.auto, directed = FALSE, noframe = FALSE, nodesize = 15, vld = 0, vc = "gray", vfc = "black", colbid = "FireBrick3", coloth = "black", cex = 1.5, ...) { if (class(a)[1] == "igraph" || class(a)[1] == "graphNEL" || class(a)[1] == "character") { a <- grMAT(a) } if (is(a,"matrix")) { if (nrow(a) == ncol(a)) { if (length(rownames(a)) != ncol(a)) { rownames(a) <- 1:ncol(a) colnames(a) <- 1:ncol(a) } if (!directed) { if (all(a == t(a)) & all(a[a != 0] == 1)) { a <- a * 10 } } l1 <- c() l2 <- c() for (i in 1:nrow(a)) { for (j in i:nrow(a)) { if (a[i, j] == 1) { l1 <- c(l1, i, j) l2 <- c(l2, 2) } if (a[j, i]%%10 == 1) { l1 <- c(l1, j, i) l2 <- c(l2, 2) } if (a[i, j] == 10) { l1 <- c(l1, i, j) l2 <- c(l2, 0) } if (a[i, j] == 11) { l1 <- c(l1, i, j, i, j) l2 <- c(l2, 2, 0) } if (a[i, j] == 100) { l1 <- c(l1, i, j) l2 <- c(l2, 3) } if (a[i, j] == 101) { l1 <- c(l1, i, j, i, j) l2 <- c(l2, 2, 3) } if (a[i, j] == 110) { l1 <- c(l1, i, j, i, j) l2 <- c(l2, 0, 3) } if (a[i, j] == 111) { l1 <- c(l1, i, j, i, j, i, j) l2 <- c(l2, 2, 0, 3) } } } } else { stop("'object' is not in a valid adjacency matrix form") } if (length(l1) > 0) { ## l1 <- l1 - 1 # igraph0 agr <- graph(l1, n = nrow(a), directed = TRUE) } if (length(l1) == 0) { agr <- graph.empty(n = nrow(a), directed = TRUE) return(tkplot(agr, vertex.label = rownames(a))) } ed0 <- get.edgelist(agr) ne <- nrow(ed0) ed <- apply(apply(ed0, 1, sort), 2, paste, collapse = "-") tb = table(ed) curve <- rep(0, ne) if (any(tb > 1)) { tb <- tb[tb > 1] for (i in 1:length(tb)) { reped <- names(tb[i]) == ed U = ed0[reped, ] if (sum(reped) == 2) { ed0[reped] if (all(is.element(c(0, 3), l2[reped]))) { curve[reped] <- c(0.9, -0.9) } if (all(U[1, ] == U[2, ])) { curve[reped] <- c(0.6, -0.6) } else { curve[reped] <- c(0.6, 0.6) } } if (sum(reped) == 3) { curve[(l2 == 3) & reped] <- 0.9 curve[(l2 == 0) & reped] <- -0.9 } if (sum(reped) == 4) { curve[(l2 == 3) & reped] <- 0.3 curve[(l2 == 0) & reped] <- -0.3 curve[(l2 == 1) & reped] <- 0.9 curve[(l2 == 2) & reped] <- 0.9 } } } col = rep(coloth, ne) col[l2 == 3] <- colbid if (dashed) { ety = rep(1, ne) ety[l2 == 3] <- 2 l2[l2 == 3] <- 0 } else { ety = rep(1, ne) } if (noframe) { vfc <- "white" vc <- "white" } if(tcltk == TRUE){ id <- tkplot(agr, layout = layout, edge.curved = curve, vertex.label = rownames(a), edge.arrow.mode = l2, edge.color = col, edge.lty = ety, vertex.label.family = "sans", edge.width = 1.5, vertex.size = nodesize, vertex.frame.color = vfc, vertex.color = vc, vertex.label.cex = cex, edge.arrow.width = 1, edge.arrow.size = 1.2, vertex.label.dist = vld, ...) } else { id <- plot(agr, layout = layout, edge.curved = curve, vertex.label = rownames(a), edge.arrow.mode = l2, edge.color = col, edge.lty = ety, vertex.label.family = "sans", edge.width = 2, vertex.size = nodesize*1.5, vertex.frame.color = vfc, vertex.color = vc, vertex.label.cex = cex*0.8, edge.arrow.width = 2, edge.arrow.size = .5, vertex.label.dist = vld, ...) } V(agr)$name <- rownames(a) agr <- set.edge.attribute(agr, "edge.arrow.mode", index = E(agr), l2) return(invisible(list(tkp.id = id, igraph = agr))) } else { stop("'object' is not in a valid format") } } ## Fit multivariate logistic model with individual covariates ############################################################# #' Inverts a marginal log-linear parametrization #' #' Inverts a marginal log-linear parametrization. #' #' A marginal log-linear link is defined by \eqn{\eta = C (M \log p)}. See #' Bartolucci et al. (2007). #' #' @param eta a vector of dimension \code{t-1} where \code{t} is the number of #' cells of a contingency table. #' @param C A contrast matrix. #' @param M A marginalization matrix. #' @param G G is the model matrix of the loglinear parameterization with no #' constant term. #' @param maxit an integer, specifying the maximum number of iterations. #' Default 500. #' @param print a logical value: if \code{TRUE}, prints the criterion after #' each cycle. #' @param tol A small value specifying the tolerance for the convergence #' criterion. Default: \code{1e-10}. #' @return A vector of probabilities \code{p}. #' @note From a Matlab function by A. Forcina, University of Perugia, Italy. #' @author Antonio Forcina, Giovanni M. Marchetti #' @seealso \code{\link{mat.mlogit}} #' @references Bartolucci, F., Colombi, R. and Forcina, A. (2007). An extended #' class of marginal link functions for modelling contingency tables by #' equality and inequality constraints. Statist. Sinica 17, 691-711. #' @keywords marginal log-linear models discrete data `binve` <- function(eta, C, M, G, maxit=500, print=FALSE, tol = 1e-10){ # Inverts a marginal loglinear parameterization. # eta has dimension t-1, # G is the model matrix of the loglinear parameterization with no intercept. # C and M are the matrices of the link. # From a Matlab function by A. Forcina, University of Perugia, Italy. ## starting values k <- nrow(C) pmin <- 1e-100 kG <- ncol(G) err <- 0 th0 <- matrix(1, k, 1) / 100 ## prepare to iterate it <- 0 mit <- 500 p <- exp(G %*% th0) p <- p/sum(p) t <- M %*% p d <- eta - C %*% log(t) div0 <- crossprod(d) hh <- 0 s <- c(.1, .6, 1.2) div <- div0 + 1 while((it < maxit) & (div > tol)){ R0 <- C %*% diagv(1/t, M) %*% diagv(p,G) rco <- rcond(R0) > tol ub <- 0 while(rco==FALSE){ cat("Rank: ", qr(R0)$rank, "\n") R0 <- R0 + diag(k) rco <- rcond(R0) > tol } de <- solve(R0) %*% d dem <- max(abs(de)) de <- (dem <= 4)*de + (dem>4)*4*de/dem th <- th0 + de p <- exp(G %*% th) p <- p/sum(p) p <- p+pmin*(p tol) { warning("div > tolerance.", call. =FALSE) } if (any(is.nan(p))){ warning("Some NaN in the vector of probabilities.", call. =FALSE) } if (it > maxit){ warning("Maximum number of iteration reached.", call. = FALSE) } if(print){ cat("Iterations: ", it, ", Div = ", div, ".\n") } as.vector(p) } # The following function has been generalized and called mlogit.param #' Multivariate logistic parametrization #' #' Find matrices \code{C} and \code{M} of e binary multivariate logistic #' parameterization. #' #' The power set is in the order of dimensions of the sets. #' #' @param d A positive integer, the number of binary responses. #' @param P A list of vectors of integers specifying margins. For instance #' \code{list(1, 2, c(1,2))}. Default: the power set of \code{1:d}. #' @return \item{C}{A contrast matrix.} \item{L}{A marginalization matrix.} #' @author Giovanni M. Marchetti #' @seealso \code{\link{binomial}, \link{marg.param}} #' @references Glonek, G. J. N. and McCullagh, P. (1995). Multivariate logistic #' models. Journal of the Royal Statistical Society, Ser. B 57, 533-546. #' @keywords logistic model #' @examples #' #' mat.mlogit(2) #' `mat.mlogit` <- function(d, P = powerset(1:d)) { ## Find matrices C and M of binary mlogit parameterization for a table 2^d. ## The output will be in the ordering of P. ## Here for 3 variables is: 1 2 3 12 13 23 123. `margmat` <- function(bi, mar){ ### Defines the marginalization matrix if(mar == FALSE){ matrix(1, 1, bi) } else { diag(bi) } } `contrmat` <- function(bi, i){ ### Contrast matrix if(i == FALSE){ 1 } else{ cbind(-1, diag(bi-1)) } } V <- 1:d C <- matrix(0,0,0) L <- c() for(mar in P){ K <- 1 H <- 1 for(i in V){ w <- is.element(i, mar) K <- contrmat(2, w) %x% K H <- margmat(2, w) %x% H } C <- blkdiag(C, K) L <- rbind(L, H) } list(C=C, L=L) } #' Power set #' #' Finds the list of all subsets of a set. #' #' If \code{sort == FALSE} the sets are in inverse lexicographical order. #' #' @param set A numeric or character vector. #' @param sort Logical value. If \code{TRUE} the subsets are sorted according #' to dimension. Default is \code{TRUE}. #' @param nonempty Logical value. If \code{TRUE} the empty set is omitted. #' Default is \code{TRUE}. #' @return A list of all subsets of \code{set}. #' @author Giovanni M. Marchetti #' @keywords sets #' @examples #' #' powerset(c("A", "B", "C"), nonempty = FALSE) #' powerset(1:3, sort = FALSE, nonempty = TRUE) #' `powerset` <- function(set, sort = TRUE, nonempty=TRUE){ ## Power set P(set). If nonempty = TRUE, the empty set is excluded. d <- length(set) if(d == 0){ if(nonempty){ stop("The set is empty.") } return(list(c())) } out <- expand.grid(rep(list(c(FALSE, TRUE)),d)) out <- as.matrix(out) out <- apply(out, 1, function(x) set[x]) if(nonempty){ out <- out[-1] } if(sort){ i <- order(unlist(lapply(out, length))) } else{ i <- 1:length(out) } names(out) <- NULL out[i] } #' Null space of a matrix #' #' Given a matrix \code{M} find a matrix \code{N} such that \eqn{N^T M} is #' zero. #' #' #' @param M A matrix. #' @return The matrix \code{N} with the basis for the null space, or an empty #' vector if the matrix \code{M} is square and of maximal rank. #' @seealso \code{\link{Null}}, ~~~ #' @keywords matrix #' @examples #' #' null(c(1,1,1)) #' `null` <- function (M) { tmp <- qr(M) set <- if (tmp$rank == 0L) 1L:ncol(M) else -(1L:tmp$rank) qr.Q(tmp, complete = TRUE)[, set, drop = FALSE] } #' Matrix product with a diagonal matrix #' #' Computes faster the product of a diagonal matrix times a full matrix. #' #' Computes \eqn{N = D_v M} where \eqn{D_v} is diagonal avoiding the #' \code{diag} operator. #' #' @param v A numeric vector specifying the elements on the diagonal of a #' matrix. #' @param M A numeric matrix compatible with the product \eqn{D_v M}. #' @return A matrix \code{N}. #' @seealso \code{\link{diag}} #' @keywords matrix #' @examples #' #' v <- 1:1000 #' M <- matrix(runif(3000), 1000, 3) #' dim(diagv(v, M)) #' `diagv` <- function(v,M){ # Computes N = diag(v) %*% M avoiding the diag operator. as.vector(v) * M } #' Block diagonal matrix #' #' Split a vector x into a block diagonal matrix. #' #' #' @param x A vector of length \code{n}. #' @param blo A vector of positive integers such that \code{sum(blo) == n}. #' @return A block-diagonal matrix with as many row as elements of \code{blo} #' and \code{n} columns. The vector \code{x} is split into \code{length(blo)} #' sub-vectors and these are the blocks of the resulting matrix. #' @author Giovanni M. Marchetti #' @seealso \code{\link{blkdiag}}, \code{\link{diag}} #' @keywords matrix #' @examples #' #' blodiag(1:10, blo = c(2, 3, 5)) #' blodiag(1:10, blo = c(3,4,0,1)) #' `blodiag` = function(x, blo){ # Split a vector x into a block diagonal matrix bith components blo. # Used by fitmlogit. k = length(blo) B = matrix(0, k, sum(blo)) u = cumsum(c(1, blo)) for(i in 1:k){ sub = u[i]:(u[i+1]-1) B[i,sub] = x[sub] } B } ##### The main function fitmlogit ########## #' Multivariate logistic models #' #' Fits a logistic regression model to multivariate binary responses. #' #' See Evans and Forcina (2011). #' #' @param \dots Model formulae of marginal logistic models for each response #' and for each association parameters (log-odds ratios). #' @param C Matrix of equality constraints. #' @param D Matrix of inequality cosntraints. #' @param data A data frame containing the responses and the explanatory #' variables. #' @param mit A positive integer: maximum number of iterations. Default: #' \code{100}. #' @param ep A tolerance used in the algorithm: default \code{1e-80}. #' @param acc A tolerance used in the algorithm: default \code{1e-4}. #' @return \item{LL}{The maximized log-likelihood.} \item{be}{The vector of the #' Maximum likelihood estimates of the parameters.} \item{S}{The estimated #' asymptotic covariance matrix.} \item{P}{The estimated cell probabilities for #' each individual.} #' @author Antonio Forcina, Giovanni M. Marchetti #' @seealso \code{\link{glm}} #' @references Evans, R.J. and Forcina, A. (2013). Two algorithms for fitting #' constrained marginal models. \emph{Computational Statistics and Data #' Analysis}, 66, 1-7. #' @keywords multivariate logistic model #' @examples #' #' data(surdata) #' out1 <- fitmlogit(A ~X, B ~ Z, cbind(A, B) ~ X*Z, data = surdata) #' out1$beta #' out2 <- fitmlogit(A ~X, B ~ Z, cbind(A, B) ~ 1, data = surdata) #' out2$beta #' `fitmlogit` <- function(..., C = c(), D = c(), data, mit = 100, ep = 1e-80, acc = 1e-4) { # Fits a logistic regression model to multivariate binary responseses. # Preliminaries loglin2 <- function(d){ # Finds the matrix G for a set o d binary variables in inv lex order. G <- 1 K <- matrix(c(1,1,0,1), 2, 2) for(i in 1:d){ G <- G %x% K } G[,-1] } mods = list(...) # mods should have 2^q - 1 components nm = length(mods) be = c() # Starting values resp = c() Xbig = c() blo = c() for (k in 1:nm){ mf = model.frame(mods[[k]], data = data) res = model.response(mf) Xsmall = model.matrix(mods[[k]], data = data) Xbig = cbind(Xbig, Xsmall) blo = c(blo, ncol(Xsmall)) nr = 1 if(is.vector(res)){ b = glm(mods[[k]], family = binomial, data = data) be = c(be, coef(b)) } else { be2 = rep(0.1, ncol(Xsmall)) be = c(be, be2) nc = ncol(res) if(nc > nr){ nr = nc Y = res } } } q = nr # number of responses b = rep(2, q) # Assuming all binary variables # Transforms the binary observation into a cell number y = 1 + (Y %*% 2^(0:(q-1))) # Finds the matrices C, M and G mml = mat.mlogit(q) Co = mml$C; Ma = mml$L; Co = as.matrix(Co) G = loglin2(q) b0 = be n = length(y) # le righe di y sono le unita' t = max(y) # Questo e' semplicemente 2^q k = length(be) # number of parameters rc = nrow(C); cc = ncol(C) rd = nrow(D); cd = ncol(D) # if (k != cc){ # warning('check col C') # } # if( k != cd){ # warning('check col D') # } if (! is.null(C)){ # se C non ha zero righe trova il null space di C U = null(C) } seta = nrow(Co) # e' la dimensione di eta mg = t(G) %*% matrix(1/t, t, t) # NB troppi t! H = solve(crossprod(G) - mg %*% G) %*% (t(G)-mg) # initialize P = matrix(0,t,n) cat('Initial probabilities\n') for (iu in 1:n){ # initialize P iu = index of a unit # X = .bdiag(lapply(mods, function(x) model.matrix(x, data = data[iu,]))) ### Change this # X = as.matrix(X) X = blodiag(Xbig[iu,], blo) eta = X %*% be eta = as.matrix(eta) p = binve(eta, Co,Ma,G) p = pmax(p,ep); p=p/sum(p) P[,iu] = p } # Iterate it=0; test=0; diss=1; LL0=0; dis=1; dm=1; while (it < mit && (dis + diss) > acc){ LL = 0; s = matrix(0, k,1); S = matrix(0, k, k); dis = 0 for (iu in 1:n) { # X = .bdiag(lapply(mods, function(x) model.matrix(x, data = data[iu,]))) # X = as.matrix(X) X = blodiag(Xbig[iu,], blo) p = P[,iu] if (it > 0){ Op = diag(p) - p %*% t(p) R = Co %*% diagv(1/(Ma %*% p),Ma) %*% Op %*% G # This is the inverse Jacobian while (rcond(R) < 1e-12){ R = R + diag(seta) } R = solve(R) delta = X %*% be - Co %*% log(Ma %*% p) th = H %*% log(p) + R %*% delta dm = max(th) - min(th) p = exp(G %*% th); p=p/sum(p) p = pmax(p,ep); p=p/sum(p) P[,iu] = p } LL = LL + log(p[y[iu]]) Op = diag(as.vector(p)) - p %*% t(p) R = Co %*% diagv(1/(Ma %*% p),Ma) %*% Op %*% G # Check while (rcond(R)<1e-12){ R = R + diag(seta) } R = solve(R) eta = Co %*% log(Ma %*% p) delta = X %*% be - eta dis = dis + sum(abs(delta)) A = G %*% R %*% X B = t(R) %*% t(G) %*% Op %*% A S = S + t(B) %*% X # attivare una delle due s = s + (t(A[y[iu],, drop = FALSE]) - t(A) %*% p) + t(B) %*% eta # versione 1 # s = s +( t(A[y[iu],]) - t(A)%*% p) # versione 2 } while(rcond(S) < 1e-10){ S = S + mean(abs(diag(S))) * diag(k) } # attivare 1 delle due b0 = be; v = solve(S, s) # versione 1 # b0=be; v = b0 + solve(S) %*% s # versione 2 if(is.null(rc) & is.null(rd)){ de = v - b0 } else if(is.null(rc)) { # only inequalities Si = solve(S) Li = t(chol(Si)) Di = D %*% Li de = NULL # de = v - b0 + Li %*% ldp(Di,-D %*% v) # Needs ldp } else if (is.null(rd)){ # only equalities Ai = solve(t(U) %*% S %*% U) de = U %*% Ai %*% t(U) %*% S %*% v - b0 } else { # both equalities and inequalities Ai = solve(t(U) %*% S %*% U) Li = t(chol(Ai)) Dz = D %*% U ta = Ai %*% t(U) %*% S %*% v #de = U %*% (ta + Li %*% ldp(Dz %*% Li, -Dz %*% ta)) - b0 # Needs ldp de = NULL } dm0 =dm; dm = max(de) - min(de); # shorten step dd = (dm > 1.5); de = de/(1 + dd*(dm^(.85))) be = b0 + de diss = sum(abs(de)) LL0 = LL it = it+1 cat(c(it, LL/100, dis/n, diss), "\n") # cat(t(be), "\n") } list(LL=LL, beta=be, S=solve(S), P=P) } #' Link function of marginal log-linear parameterization #' #' Provides the contrast and marginalization matrices for the marginal #' parametrization of a probability vector. #' #' See Bartolucci, Colombi and Forcina (2007). #' #' @param lev Integer vector containing the number of levels of each variable. #' @param type A character vector with elements \code{"l"}, \code{"g"}, #' \code{"c"}, or \code{"r"} indicating the type of logit. The meaning is as #' follows: \code{"g"} for global, \code{"c"} for continuation, \code{"r"} for #' reverse continuation and \code{"l"} for local. #' @return \item{C}{Matrix of constrasts (the first \code{sum(lev)-length(r)} #' elements are referred to univariate logits)} \item{M}{Marginalization matrix #' with elements 0 and 1.} \item{G}{Corresponding design matrix for the #' corresponding log-linear model.} #' @note Assumes that the vector of probabilities is in inv lex order. The #' interactions are returned in order of dimension, like e.g., 1, 2, 3, 12, 13, #' 23, 123. #' @author Francesco Bartolucci, Antonio Forcina, Giovanni M. Marchetti #' @seealso \code{\link{mat.mlogit}} #' @references Bartolucci, F., Colombi, R. and Forcina, A. (2007). An extended #' class of marginal link functions for modelling contingency tables by #' equality and inequality constraints. Statist. Sinica 17, 691-711. #' @keywords logistic models ordinal models #' @examples #' #' marg.param(c(3,3), c("l", "g")) #' `marg.param` = function(lev,type) # Creates matrices C and M for the marginal parametrization # of the probability vector for a vector of categorical variables. # INPUT: # lev: vector containing the number of levels of each variable # type: vector with elements 'l', 'g', 'c', 'r' indicating the type of logit # 'g' for global, # 'c' for continuation, # 'r' for reverse continuation, # 'l' for local. # OUTPUT: # C: matrix of constrats (the first sum(lev)-length(r) elements are # referred to univariate logits) # M: marginalization matrix with elements 0 and 1 # G: corresponding design matrix for the corresponding log-linear model # Translated from a Matlab function by Bartolucci and Forcina. # NOTE: assumes that the vector of probabilities is in inv lex order. # The interactions are returned in order of dimension, like e.g., 1 2 3 12 13 23 123. { # preliminaries `powset` <- function(d) # Power set P(d). { P = expand.grid(rep(list(1:2), d)) P[order(apply(P, 1, sum)),]-1 } r = length(lev) # create sets of parameters S = powset(r) S = S[-1,] # drop the empty set C = c(); M = c(); G = c() for (i in 1:nrow(S)){ si = S[i,] Ci = 1 # to update matrix C for (h in 1:r){ if(si[h]==1){ I = diag(lev[h] - 1) Ci = cbind(-I, I) %x% Ci } } C = blkdiag(C, Ci) Mi = 1 # to update matrix M for (h in 1:r) { lh = lev[h]-1 if(si[h]==1) { I = diag(lh) T = 0 + lower.tri(matrix(1, lh,lh), diag=TRUE) ze = matrix(0, lh, 1) Mi = switch(type[h], l = rbind(cbind(I, ze), cbind(ze, I)) %x% Mi, g = rbind(cbind(T, ze), cbind(ze, t(T))) %x% Mi, c = rbind(cbind(I, ze), cbind(ze, t(T))) %x% Mi, r = rbind(cbind(T, ze), cbind(ze, I)) %x% Mi) } else { Mi = matrix(1, 1,lev[h]) %x% Mi } } M = rbind(M, Mi) Gi = 1 # for the design matrix for (h in 1:r) { lh = lev[h] if(si[h]==1) { T = 0 + lower.tri(matrix(1, lh,lh), diag=TRUE); T = T[,-1] Gi = T %x% Gi } else{ Gi = matrix(1, lh, 1) %x% Gi } } G = cbind(G, Gi) } list(C = C, M = M, G = G) } #' Block diagonal matrix #' #' Block diagonal concatenation of input arguments. #' #' #' @param \dots Variable number of matrices \code{M1, M2, ...}. #' @return A block diagonal matrix \code{diag(M1, M2, ...)}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{diag}} #' @keywords matrix #' @examples #' #' X <- c(1,1,2,2); Z <- c(10, 20, 30, 40); A <- factor(c(1,2,2,2)) #' blkdiag(model.matrix(~X+Z), model.matrix(~A)) #' `blkdiag` <- function(...){ ### Block diagonal concatenation of input arguments. a <- list(...) Y <- matrix(0,0,0); for(M in a){ if(is.null(M)) M <- matrix(0,0,0) M <- as.matrix(M) dY <- dim(Y); dM <- dim(M) zeros1 <- matrix(0, dY[1], dM[2]) zeros2 <- matrix(0, dM[1], dY[2]) Y <- rbind(cbind(Y, zeros1), cbind(zeros2, M)) } Y } # source("~/Documents/R/graphical_models/fitmlogit.R") # fitmlogit(A ~X, B ~ Z, cbind(A, B) ~ 1, data = datisim) # source("~/Documents/R/graphical_models/ilaria/sim-blogit.R") ggm/R/ggm_latest-package.R0000644000176200001440000004301113134667704015101 0ustar liggesusers #' Anger data #' #' Anger data #' #' Trait variables are viewed as stable personality characteristics, and state #' variables denote behaviour in specific situations. See Cox and Wermuth #' (1996). #' #' @name anger #' @docType data #' @format A covariance matrix for 4 variables measured on 684 female students. #' \describe{ \item{X}{anxiety state} \item{Y}{anger state} \item{Z}{anxiety #' trait} \item{U}{anger trait} } #' @references Cox, D. R. and Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman and Hall. #' #' Cox, D.R. and Wermuth, N. (1990). \emph{An approximation to maximum #' likelihood estimates in reduced models}. 77(4), 747-761. #' @keywords datasets #' @examples #' #' # Fit a chordless 4-cycle model #' data(anger) #' G = UG(~ Y*X + X*Z + Z*U + U*Y) #' fitConGraph(G,anger, 684) #' NULL #' Data on blood pressure body mass and age #' #' Raw data on blood pressure, body mass and age on 44 female patients, and #' covariance matrix for derived variables. #' #' #' @name derived #' @docType data #' @format A list containing a dataframe \code{raw} with 44 lines and 5 columns #' and a symmetric 4x4 covariance matrix \code{S}. #' #' The following is the description of the variables in the dataframe #' \code{raw} \describe{ \item{list("Sys")}{Systolic blood pressure, in mm Hg} #' \item{list("Dia")}{Diastolic blood pressure, in mm Hg} #' \item{list("Age")}{Age of the patient, in years} \item{list("Hei")}{Height, #' in cm} \item{list("Wei")}{Weight, in kg} } The following is the description #' of the variables for the covariance matrix \code{S}. \describe{ #' \item{list("Y")}{Derived variable \code{Y=log(Sys/Dia)}} #' \item{list("X")}{Derived variables \code{X=log(Dia)}} \item{list("Z")}{Body #' mass index \code{Z=Wei/(Hei/100)^2}} \item{list("W")}{Age} } #' @references Wermuth N. and Cox D.R. (1995). Derived variables calculated #' from similar joint responses: some characteristics and examples. #' \emph{Computational Statistics and Data Analysis}, 19, 223-234. #' @keywords datasets #' @examples #' #' # A DAG model with a latent variable U #' G = DAG(Y ~ Z + U, X ~ U + W, Z ~ W) #' #' data(derived) #' #' # The model fitted using the derived variables #' out = fitDagLatent(G, derived$S, n = 44, latent = "U") #' #' # An ancestral graph model marginalizing over U #' H = AG(G, M = "U") #' #' # The ancestral graph model fitted obtaining the #' # same result #' out2 = fitAncestralGraph(H, derived$S, n = 44) #' NULL #' The package \code{ggm}: summary information #' #' This package provides functions for defining, manipulating and fitting #' graphical Markov models. #' #' #' @section Functions: The main functions can be classified as follows. #' \itemize{ \item Functions for defining graphs (undirected, directed acyclic, #' ancestral graphs): \code{\link{UG}}, \code{\link{DAG}}, #' \code{\link{makeMG}}, \code{\link{grMAT}}; \item Functions for doing graph #' operations (parents, boundary, cliques, connected components, fundamental #' cycles, d-separation, m-separation): \code{\link{pa}}, \code{\link{bd}}, #' \code{\link{cliques}}, \code{\link{conComp}}, \code{\link{fundCycles}}; #' \item Functions for testing independence statements and generating maximal #' graphs from non-maximal graphs: \code{\link{dSep}}, \code{\link{msep}}, #' \code{\link{Max}}; \item Function for finding covariance and concentration #' graphs induced by marginalization and conditioning: #' \code{\link{inducedCovGraph}}, \code{\link{inducedConGraph}}; \item #' Functions for finding multivariate regression graphs and chain graphs #' induced by marginalization and conditioning: \code{\link{inducedRegGraph}}, #' \code{\link{inducedChainGraph}}, \code{\link{inducedDAG}}; \item Functions #' for finding stable mixed graphs (ancestral, summary and ribbonless) after #' marginalization and conditioning: \code{\link{AG}}, \code{\link{SG}}, #' \code{\link{RG}}; \item Functions for fitting by ML Gaussian DAGs, #' concentration graphs, covariance graphs and ancestral graphs: #' \code{\link{fitDag}}, \code{\link{fitConGraph}}, \code{\link{fitCovGraph}}, #' \code{\link{fitAncestralGraph}}; \item Functions for testing several #' conditional independences:\code{\link{shipley.test}}; \item Functions for #' checking global identification of DAG Gaussian models with one latent #' variable (Stanghellini-Vicard's condition for concentration graphs, new #' sufficient conditions for DAGs): \code{\link{isGident}}, #' \code{\link{checkIdent}}; \item Functions for fitting Gaussian DAG models #' with one latent variable: \code{\link{fitDagLatent}}; \item Functions for #' testing Markov equivalences and generating Markov equivalent graphs of #' specific types: \code{\link{MarkEqRcg}}, \code{\link{MarkEqMag}}, #' \code{\link{RepMarDAG}}, \code{\link{RepMarUG}}, \code{\link{RepMarBG}}. } #' The package is intended as a contribution to the gR-project derscribed by #' Lauritzen (2002). #' @references Lauritzen, S. L. (2002). gRaphical Models in R. \emph{R News}, #' 3(2)39. #' @keywords multivariate models graphs NULL #' Glucose control #' #' Data on glucose control of diabetes patients. #' #' Data on 68 patients with fewer than 25 years of diabetes. They were #' collected at the University of Mainz to identify psychological and #' socio-economic variables possibly important for glucose control, when #' patients choose the appropriate dose of treatment depending on the level of #' blood glucose measured several times per day. #' #' The variable of primary interest is \code{Y}, glucose control, measured by #' glycosylated haemoglobin. \code{X}, knowledge about the illness, is a #' response of secondary interest. Variables \code{Z}, \code{U} and \code{V} #' measure patients' type of attribution, called fatalistic externality, social #' externality and internality. These are intermediate variables. Background #' variables are \code{W}, the duration of the illness, \code{A} the duration #' of formal schooling and \code{B}, gender. The background variables \code{A} #' and \code{B} are binary variables with coding \code{-1}, \code{1}. #' #' @name glucose #' @docType data #' @format A data frame with 68 observations on the following 8 variables. #' \describe{ \item{Y}{a numeric vector, Glucose control (glycosylated #' haemoglobin), values up to about 7 or 8 indicate good glucose control.} #' \item{X}{a numeric vector, a score for knowledge about the illness.} #' \item{Z}{a numeric vector, a score for fatalistic externality (mere chance #' determines what occurs).} \item{U}{a numeric vector, a score for social #' externality (powerful others are responsible).} \item{V}{a numeric vector, a #' score for internality (the patient is him or herself responsible).} #' \item{W}{a numeric vector, duration of the illness in years.} \item{A}{a #' numeric vector, level of education, with levels \code{-1}: at least 13 years #' of formal schooling, \code{1}: less then 13 years.} \item{B}{a numeric #' vector, gender with levels \code{-1}: females, \code{1}: males.} } #' @references Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' @source Cox & Wermuth (1996), p. 229. #' @keywords datasets #' @examples #' #' data(glucose) #' ## See Cox & Wermuth (1996), Figure 6.3 p. 140 #' coplot(Y ~ W | A, data=glucose) #' NULL #' Graphs induced by marginalization or conditioning #' #' Functions to find induced graphs after conditioning on a set of variables #' and marginalizing over another set. #' #' Given a directed acyclic graph representing a set of conditional #' independencies it is possible to obtain other graphs of conditional #' independence implied after marginalizingover and conditionig on sets of #' nodes. Such graphs are the covariance graph, the concentration graph, the #' multivariate regression graph and the chain graph with different #' interpretations (see Cox \& Wermuth, 1996, 2004). #' #' @aliases inducedCovGraph inducedConGraph inducedRegGraph inducedChainGraph #' inducedDAG InducedGraphs #' @param amat a square Boolean matrix, the adjacency matrix of a directed #' acyclic graph. The names of rows and of the columns are the nodes of the #' DAG. #' @param sel a character vector representing a subset of selected variables. #' The elements of the vector must be a subset of the names of the nodes i.e. #' of \code{rownames(A)}. By default \code{sel} is the set of the nodes of the #' DAG. #' @param cond a character vector representing the variables on which you want #' to condition. \code{cond} must be disjoint from \code{sel} and their union #' must be a subset of the set of nodes. The set difference between the set of #' nodes and the union of \code{sel} and \code{cond} are the variables over #' which we marginalize. \code{cond} may be the null vector (the default), #' meaning that you want to condition on the empty set. #' @param cc a list of character vectors specifying the chain components for #' the chain graph. #' @param type a string indicating the interpretation of the chain graph. It #' can be either "LWF" (Lauritzen, Wermuth, Frydenberg interpretation), "AMP" #' (Andersson, Madigan, Perlman interpretation) or "MRG" (Multivariate #' regression graph interpretation). #' @param order a character vector indicating the ordering of the vertices of a #' DAG (left to right, past to future). #' @return \code{inducedCovGraph} returns the adjacency matrix of the #' covariance graph of the variables in set \code{sel} given the variables in #' set \code{cond}, implied by the original directed acyclic graph with #' adjacency matrix \code{amat}. #' #' \code{inducedConGraph} returns the adjacency matrix of the concentration #' graph of the variables in set \code{sel} given the variables in set #' \code{cond}, implied by the original directed acyclic graph with adjacency #' matrix \code{amat}. #' #' \code{inducedRegGraph} returns the adjacency matrix of the multivariate #' regression graph of the variables in set \code{sel} given the variables in #' set \code{cond}, implied by the original directed acyclic graph with #' adjacency matrix \code{amat}. #' #' \code{inducedChainGraph} returns the adjacency matrix of the chain graph for #' the variables in chain components \code{cc}, given the variables in set #' \code{cond}, with interpretation specified by string \code{type}, implied by #' the original directed acyclic graph with adjacency matrix \code{amat}. #' #' \code{inducedDAG} returns the adjacency matrix of the DAG with the ordering #' \code{order}, implied by the original directed acyclic graph with adjacency #' matrix \code{amat}. #' @note If \code{sel} is \code{NULL} the functions return the null matrix. If #' \code{cond} is \code{NULL}, the conditioning set is empty and the functions #' \code{inducedConGraph} and \code{inducedCovGraph} return the overall induced #' covariance or concentration matrices of the selected variables. If you do #' not specify \code{sel} you cannot specify a non \code{NULL} value of #' \code{cond}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{DAG}}, \code{\link{UG}},\code{\link{isAcyclic}} #' @references Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' #' Wermuth, N. \& Cox, D.R. (2004). Joint response graphs and separation #' induced by triangular systems. \emph{J.R. Statist. Soc. B}, 66, Part 3, #' 687-717. #' @keywords graphs models multivariate #' @examples #' #' ## Define a DAG #' dag <- DAG(a ~ x, c ~ b+d, d~ x) #' dag #' ## Induced covariance graph of a, b, d given the empty set. #' inducedCovGraph(dag, sel=c("a", "b", "d"), cond=NULL) #' #' ## Induced concentration graph of a, b, c given x #' inducedConGraph(dag, sel=c("a", "b", "c"), cond="x") #' #' ## Overall covariance graph #' inducedCovGraph(dag) #' #' ## Overall concentration graph #' inducedConGraph(dag) #' #' ## Induced covariance graph of x, b, d given c, x. #' inducedCovGraph(dag, sel=c("a", "b", "d"), cond=c("c", "x")) #' #' ## Induced concentration graph of a, x, c given d, b. #' inducedConGraph(dag, sel=c("a", "x", "c"), cond=c("d", "b")) #' #' ## The DAG on p. 198 of Cox & Wermuth (1996) #' dag <- DAG(y1~ y2 + y3, y3 ~ y5, y4 ~ y5) #' #' ## Cf. figure 8.7 p. 203 in Cox & Wermuth (1996) #' inducedCovGraph(dag, sel=c("y2", "y3", "y4", "y5"), cond="y1") #' inducedCovGraph(dag, sel=c("y1", "y2", "y4", "y5"), cond="y3") #' inducedCovGraph(dag, sel=c("y1", "y2", "y3", "y4"), cond="y5") #' #' ## Cf. figure 8.8 p. 203 in Cox & Wermuth (1996) #' inducedConGraph(dag, sel=c("y2", "y3", "y4", "y5"), cond="y1") #' inducedConGraph(dag, sel=c("y1", "y2", "y4", "y5"), cond="y3") #' inducedConGraph(dag, sel=c("y1", "y2", "y3", "y4"), cond="y5") #' #' ## Cf. figure 8.9 p. 204 in Cox & Wermuth (1996) #' inducedCovGraph(dag, sel=c("y2", "y3", "y4", "y5"), cond=NULL) #' inducedCovGraph(dag, sel=c("y1", "y2", "y4", "y5"), cond=NULL) #' inducedCovGraph(dag, sel=c("y1", "y2", "y3", "y4"), cond=NULL) #' #' ## Cf. figure 8.10 p. 204 in Cox & Wermuth (1996) #' inducedConGraph(dag, sel=c("y2", "y3", "y4", "y5"), cond=NULL) #' inducedConGraph(dag, sel=c("y1", "y2", "y4", "y5"), cond=NULL) #' inducedConGraph(dag, sel=c("y1", "y2", "y3", "y4"), cond=NULL) #' #' ## An induced regression graph #' dag2 = DAG(Y ~ X+U, W ~ Z+U) #' inducedRegGraph(dag2, sel="W", cond=c("Y", "X", "Z")) #' #' ## An induced DAG #' inducedDAG(dag2, order=c("X","Y","Z","W")) #' #' ## An induced multivariate regression graph #' inducedRegGraph(dag2, sel=c("Y", "W"), cond=c("X", "Z")) #' #' ## An induced chain graph with LWF interpretation #' dag3 = DAG(X~W, W~Y, U~Y+Z) #' cc = list(c("W", "U"), c("X", "Y", "Z")) #' inducedChainGraph(dag3, cc=cc, type="LWF") #' #' ## ... with AMP interpretation #' inducedChainGraph(dag3, cc=cc, type="AMP") #' #' ## ... with multivariate regression interpretation #' cc= list(c("U"), c("Z", "Y"), c("X", "W")) #' inducedChainGraph(dag3, cc=cc, type="MRG") #' NULL #' Mathematics marks #' #' Examination marks of 88 students in five subjects. #' #' Mechanics and Vectors were closed book examinations. Algebra, Analysis and #' Statistics were open book examinations. #' #' @name marks #' @docType data #' @format A data frame with 88 observations on the following 5 variables. #' \describe{ \item{mechanics}{a numeric vector, mark in Mechanics} #' \item{vectors}{a numeric vector, mark in Vectors} \item{algebra}{a numeric #' vector, mark in Algebra} \item{analysis}{a numeric vector, mark in Analysis} #' \item{statistics}{a numeric vector, mark in Statistics } } #' @references Whittaker, J. (1990). \emph{Graphical models in applied #' multivariate statistics}. Chichester: Wiley. #' @source Mardia, K.V., Kent, J.T. and Bibby, (1979). \emph{Multivariate #' analysis}. London: Academic Press. #' @keywords datasets #' @examples #' #' data(marks) #' pairs(marks) #' NULL #' Simple graph operations #' #' Finds the boundary, children, parents of a subset of nodes of a graph. #' #' For definitions of the operators see Lauritzen (1996). #' #' @aliases bd ch pa #' @param nn a vector of nodes. It may either a numeric vector, or a character #' vector. If it is character vector must be a subset of the \code{rownames} of #' the edge matrix. #' @param amat a square matrix with dimnames specifying the adjacency matrix of #' the graph #' @return The operators return a character vector specifying the boundary or #' the children or the parents of nodes \code{nn} in the graph. This is a #' numeric or a character vector depending on the mode of \code{nn}. #' @author Giovanni M. Marchetti #' @seealso \code{\link{UG}}, \code{\link{DAG}} #' @references Lauritzen, S. (1996). \emph{Graphical models}. Oxford: Clarendon #' Press. #' @keywords graphs models multivariate #' @examples #' #' ## find boundary of a subset of nodes of a DAG #' G <- DAG(y ~ x+b+a, b~a, x~a) #' bd("b", G) #' bd(c("b", "x"), G) #' bd("x", G) #' bd(c("x","b"), G) #' ## find boundary of a subset of nodes of an UG #' G <- UG(~ y*x*z + z*h*v) #' bd("z", G) #' bd(c("y", "x"), G) #' bd("v", G) #' bd(c("x","v"), G) #' ## children of a subset of nodes of a DAG #' G <- DAG(y ~ x+b+a, b~a, x~a) #' ch("b", G) #' ch(c("b", "x"), G) #' ch("x", G) #' ch(c("a","x"), G) #' ## parents of a subset of nodes of a DAG #' pa("b", G) #' pa(c("b", "x"), G) #' pa("x", G) #' pa(c("x","b"), G) #' NULL #' Stress #' #' Stress data #' #' See Cox and Wermuth (1996). #' #' @name stress #' @docType data #' @format A \eqn{4 \times 4} covariance matrix for the following variables. #' \describe{ \item{Y}{} \item{V}{} \item{X}{} \item{U}{} } #' @references Cox, D. R. \& Wermuth, N. (1996). \emph{Multivariate #' dependencies}. London: Chapman \& Hall. #' #' Slangen K., Kleemann P.P and Krohne H.W. (1993). Coping with surgical #' stress. In: Krohne H. W. (ed.). \emph{Attention and avoidance: Strategies #' in coping with aversiveness}. New York, Heidelberg: Springer, 321-346. #' @keywords datasets #' @examples #' #' data(stress) #' G = UG(~ Y*X + X*V + V*U + U*Y) #' fitConGraph(G, stress, 100) #' NULL #' A simulated data set #' #' Simulated data following a seemingly unrelated regression model. #' #' #' @name surdata #' @docType data #' @format A data frame with 600 observations on the following 4 variables. #' \describe{ \item{list("A")}{a numeric response vector} \item{list("B")}{a #' numeric response vector} \item{list("X")}{a numeric vector} #' \item{list("Z")}{a numeric vector with codes \code{1} and \code{-1} for a #' binary variables.} } #' @keywords datasets #' @examples #' #' data(surdata) #' pairs(surdata) #' NULL #' Utility functions #' #' Functions used internally. #' #' #' @aliases rem SPl RR likGau #' @author Kayvan Sadeghi, Giovanni M. Marchetti #' @seealso \code{unique},\code{setdiff}, \code{is.element} #' @keywords utility NULL ggm/R/kayvan.R0000644000176200001440000022414313621530500012642 0ustar liggesusers#### Functions by Kayvan Sadeghi 2011-2012 ## May 2012 Changed the return values of some functions to TRUE FALSE require(graph) require(igraph) ###################################################################### ###################################################################### rem<-function(a,r){ # this is setdiff (a, r) k<-0 b<-a for (i in a){ k<-k+1 for(j in r){ if(i==j){ b<-b[-k] k<-k-1 break} } } return(b) } ############################################################################# ############################################################################# #SPl<-function(a,alpha){ # a<-sort(a) # alpha<-sort(alpha) # r<-c() # if (length(alpha)>0){ # for(i in 1:length(a)){ # for(j in 1:length(alpha)){ # if(a[i]==alpha[j]){ # r<-c(r,i) # break}}}} # return(r) #} ############################################################################### # Finds indices of b in sorted a 'SPl' = function(a, b) (1:length(a))[is.element(sort(a), b)] ############################################################################## RR<-function(a){ ## This is unique(a) a<-sort(a) r<-a[1] i<-1 while(i 1&amat31[kkk,kk]%%10==1)) { for (ii in 1:(lenidx )) { #for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], kkk]%%10==0&idx[ii]!=kkk){ amat21[idx[ii], kkk] <- TRUE }} } } } amatr<-amat21 ################################################################2 amat22 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in S) { idx <- which(amatr[, kk]%%10 == 1) idy <- which(amatr[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%10==0 & idx[ii]!=idy[jj]){ amat22[idx[ii], idy[jj]] <- 1 }} } } } amatr<-amat22+amatr #################################################################3 amat33<- t(amatr) amat23 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amat33[, kk]%%10 == 1) idy <- which(amat33[, kk]%%100> 9) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idy[jj], idx[ii]]%%10==0 & idx[ii]!=idy[jj]){ amat23[idy[jj], idx[ii]] <- 1 }} } } } amatr<-amat23+amatr ##################################################################4 amat24 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amatr[, kk]%%100>9) idy <- which(amatr[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%10==0 & idx[ii]!=idy[jj]){ amat24[idx[ii], idy[jj]] <- 1 }} } } } amatr<-amat24+amatr ####################################################################5 amat35<- t(amatr) amat25 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amat35[, kk]%%10 == 1) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]<100){ amat25[idx[ii], idx[jj]] <- 100 }} } } } amatr<-amat25+t(amat25)+amatr ######################################################################6 amat36<- t(amatr) amat26 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amat36[, kk]%%10 == 1) idy <- which(amat36[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idy[jj], idx[ii]]<100 & idx[ii]!=idy[jj]){ amat26[idy[jj], idx[ii]] <- 100 }} } } } amatr<-amat26+t(amat26)+amatr #################################################################7 amat27 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in S) { idx <- which(amatr[, kk]>99) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]<100){ amat27[idx[ii], idx[jj]] <- 100 }} } } } amatr<-amat27+t(amat27)+amatr ################################################################8 amat28 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in S) { idx <- which(amatr[, kk]%%10 == 1) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]%%100<10){ amat28[idx[ii], idx[jj]] <- 10 }} } } } amatr<-amat28+t(amat28)+amatr #################################################################9 amat29 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amatr[, kk]%%10 == 1) idy <- which(amatr[, kk]%%100> 9) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%100<10 & idx[ii]!=idy[jj]){ amat29[idx[ii], idy[jj]] <- 10 }} } } } amatr<-amat29+t(amat29)+amatr ##################################################################10 amat20 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amatr[, kk]%%100>9) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]%%100<10){ amat20[idx[ii], idx[jj]] <- 10 }} } } } amatr<-amat20+t(amat20)+amatr } Mn<-c() Cn<-c() for(i in M){ Mn<-c(Mn,which(rownames(amat)==i))} for(i in C){ Cn<-c(Cn,which(rownames(amat)==i))} if(length(Mn)>0&length(Cn)>0){ fr<-amatr[-c(Mn,Cn),-c(Mn,Cn)]} if(length(Mn)>0&length(Cn)==0){ fr<-amatr[-c(Mn),-c(Mn)]} if(length(Mn)==0&length(Cn)>0){ fr<-amatr[-c(Cn),-c(Cn)]} if(length(Mn)==0&length(Cn)==0){ fr<-amatr} if(plot==TRUE){ plotfun(fr,...)} if(showmat==FALSE){ invisible(fr)} else{return(fr)} } ############################################################################## ############################################################################## #' summary graph #' #' \code{SG} generates and plots summary graphs after marginalization and #' conditioning. #' #' #' @param amat An adjacency matrix, or a graph that can be a \code{graphNEL} or #' an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} #' is the number of edges of the graph, that is a sequence of triples (type, #' node1label, node2label). The type of edge can be \code{"a"} (arrows from #' node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). #' @param M A subset of the node set of \code{a} that is going to be #' marginalised over #' @param C Another disjoint subset of the node set of \code{a} that is going #' to be conditioned on. #' @param showmat A logical value. \code{TRUE} (by default) to print the #' generated matrix. #' @param plot A logical value, \code{FALSE} (by default). \code{TRUE} to plot #' the generated graph. #' @param plotfun Function to plot the graph when \code{plot == TRUE}. Can be #' \code{plotGraph} (the default) or \code{drawGraph}. #' @param \dots Further arguments passed to \code{plotfun}. #' @return A matrix that consists 4 different integers as an \eqn{ij}-element: #' 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from #' \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 #' for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added #' to be associated with multiple edges of different types. The matrix is #' symmetric w.r.t full lines and bi-directed arrows. #' @author Kayvan Sadeghi #' @seealso \code{\link{AG}}, \code{\link{MSG}}, \code{\link{RG}} #' @references Sadeghi, K. (2011). Stable classes of graphs containing directed #' acyclic graphs. \emph{Submitted}. #' #' Wermuth, N. (2011). Probability distributions with summary graph structure. #' \emph{Bernoulli}, 17(3),845-879. #' @keywords graphs directed acyclic graph marginalization and conditioning #' summary graph #' @examples #' #' ex <- matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ##The adjacency matrix of a DAG #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0),16,16, byrow = TRUE) #' M <- c(3,5,6,15,16) #' C <- c(4,7) #' SG(ex, M, C, plot = TRUE) #' SG(ex, M, C, plot = TRUE, plotfun = drawGraph, adjust = FALSE) #' `SG`<-function (amat,M=c(),C=c(),showmat=TRUE, plot=FALSE, plotfun = plotGraph, ...) { if(class(amat)[1] == "igraph" || class(amat)[1] == "graphNEL" || class(amat)[1] == "character") { amat<-grMAT(amat)} if(is(amat,"matrix")){ if(nrow(amat)==ncol(amat)){ if(length(rownames(amat))!=ncol(amat)){ rownames(amat)<-1:ncol(amat) colnames(amat)<-1:ncol(amat)} } else { stop("'object' is not in a valid adjacency matrix form")}} if(!is(amat,"matrix")) { stop("'object' is not in a valid form")} S<-C St<-c() while(identical(S,St)==FALSE) { St<-S for(j in S){ for(i in rownames(amat)){ if(amat[i,j]%%10 == 1){ S<-c(i,S[S!=i])} } } } amatr<-amat amatt<-2*amat while(identical(amatr,amatt)==FALSE) { amatt<-amatr ############################################################1 amat21 <- amatr for (kk in M) { for (kkk in 1:ncol(amatr)) { amat31<-amatr if (amatr[kkk,kk]%%10==1|amatr[kk,kkk]%%10==1) { amat31[kk,kkk]<-amatr[kkk,kk] amat31[kkk,kk]<-amatr[kk,kkk]} idx <- which(amat31[, kk]%%10 == 1) lenidx <- length(idx) if ((lenidx > 1&amat31[kkk,kk]%%10==1)) { for (ii in 1:(lenidx )) { #for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], kkk]%%10==0&idx[ii]!=kkk){ amat21[idx[ii], kkk] <- TRUE }} } } } amatr<-amat21 ################################################################2 amat22 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in S) { idx <- which(amatr[, kk]%%10 == 1) idy <- which(amatr[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%10==0 & idx[ii]!=idy[jj]){ amat22[idx[ii], idy[jj]] <- 1 }} } } } amatr<-amat22+amatr #################################################################3 amat33<- t(amatr) amat23 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amat33[, kk]%%10 == 1) idy <- which(amat33[, kk]%%100> 9) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idy[jj], idx[ii]]%%10==0 & idx[ii]!=idy[jj]){ amat23[idy[jj], idx[ii]] <- 1 }} } } } amatr<-amat23+amatr ##################################################################4 amat24 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amatr[, kk]%%100>9) idy <- which(amatr[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%10==0 & idx[ii]!=idy[jj]){ amat24[idx[ii], idy[jj]] <- 1 }} } } } amatr<-amat24+amatr ####################################################################5 amat35<- t(amatr) amat25 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amat35[, kk]%%10 == 1) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]<100){ amat25[idx[ii], idx[jj]] <- 100 }} } } } amatr<-amat25+t(amat25)+amatr ######################################################################6 amat36<- t(amatr) amat26 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amat36[, kk]%%10 == 1) idy <- which(amat36[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idy[jj], idx[ii]]<100 & idx[ii]!=idy[jj]){ amat26[idy[jj], idx[ii]] <- 100 }} } } } amatr<-amat26+t(amat26)+amatr #################################################################7 amat27 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in S) { idx <- which(amatr[, kk]>99) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]<100){ amat27[idx[ii], idx[jj]] <- 100 }} } } } amatr<-amat27+t(amat27)+amatr ################################################################8 amat28 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in S) { idx <- which(amatr[, kk]%%10 == 1) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]%%100<10){ amat28[idx[ii], idx[jj]] <- 10 }} } } } amatr<-amat28+t(amat28)+amatr #################################################################9 amat29 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amatr[, kk]%%10 == 1) idy <- which(amatr[, kk]%%100> 9) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%100<10 & idx[ii]!=idy[jj]){ amat29[idx[ii], idy[jj]] <- 10 }} } } } amatr<-amat29+t(amat29)+amatr ##################################################################10 amat20 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amatr[, kk]%%100>9) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]%%100<10){ amat20[idx[ii], idx[jj]] <- 10 }} } } } amatr<-amat20+t(amat20)+amatr } for(i in 1:ncol(amatr)) { for(j in 1:ncol(amatr)) { if(amatr[i,j]%%100>9){ amatr[i,j]<-10 for(k in 1:ncol(amatr)){ if(amatr[k,j]==100){ amatr[j,k]<-1 amatr[k,j]<-0 }} } }} SS<-S SSt<-c() while(identical(SS,SSt)==FALSE) { SSt<-SS for(j in SS){ for(i in rownames(amat)) { if(amatr[i,j]%%10 == 1){ SS<-c(i,SS[SS!=i])} } } } for(i in SS){ for(j in SS) { if(amatr[i,j]%%10==1){ amatr[i,j]<-10 amatr[j,i]<-10}}} Mn<-c() Cn<-c() for(i in M){ Mn<-c(Mn,which(rownames(amat)==i))} for(i in C){ Cn<-c(Cn,which(rownames(amat)==i))} if(length(Mn)>0&length(Cn)>0){ fr<-amatr[-c(Mn,Cn),-c(Mn,Cn)]} if(length(Mn)>0&length(Cn)==0){ fr<-amatr[-c(Mn),-c(Mn)]} if(length(Mn)==0&length(Cn)>0){ fr<-amatr[-c(Cn),-c(Cn)]} if(length(Mn)==0&length(Cn)==0){ fr<-amatr} if(plot==TRUE){ plotfun(fr,...)} if(showmat==FALSE){ invisible(fr)} else{return(fr)} } ############################################################################## ############################################################################## #' Ancestral graph #' #' \code{AG} generates and plots ancestral graphs after marginalization and #' conditioning. #' #' #' @param amat An adjacency matrix, or a graph that can be of class #' \code{graphNEL-class} or an \code{\link{igraph}} object, or a vector of #' length \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is #' a sequence of triples (type, node1label, node2label). The type of edge can #' be \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and #' \code{"l"} (lines). #' @param M A subset of the node set of \code{a} that is going to be #' marginalized over #' @param C Another disjoint subset of the node set of \code{a} that is going #' to be conditioned on. #' @param showmat A logical value. \code{TRUE} (by default) to print the #' generated matrix. #' @param plot A logical value, \code{FALSE} (by default). \code{TRUE} to plot #' the generated graph. #' @param plotfun Function to plot the graph when \code{plot == TRUE}. Can be #' \code{plotGraph} (the default) or \code{drawGraph}. #' @param \dots Further arguments passed to \code{plotfun}. #' @return A matrix that is the adjacency matrix of the generated graph. It #' consists of 4 different integers as an \eqn{ij}-element: 0 for a missing #' edge between \eqn{i} and \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 #' for a full line between \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow #' between \eqn{i} and \eqn{j}. These numbers are added to be associated with #' multiple edges of different types. The matrix is symmetric w.r.t full lines #' and bi-directed arrows. #' @author Kayvan Sadeghi #' @seealso \code{\link{MAG}}, \code{\link{RG}}, \code{\link{SG}} #' @references Richardson, T.S. and Spirtes, P. (2002). Ancestral graph Markov #' models. \emph{Annals of Statistics}, 30(4), 962-1030. #' #' Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic #' graphs. \emph{Submitted}. #' @keywords graphs ancestral graph directed acyclic graph marginalization and #' conditioning #' @examples #' #' ##The adjacency matrix of a DAG #' ex<-matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0),16,16,byrow=TRUE) #' M <- c(3,5,6,15,16) #' C <- c(4,7) #' AG(ex, M, C, plot = TRUE) #' `AG`<-function (amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, ...) { if(class(amat)[1] == "igraph" || class(amat)[1] == "graphNEL" || class(amat)[1] == "character") { amat<-grMAT(amat)} if(is(amat,"matrix")){ if(nrow(amat)==ncol(amat)){ if(length(rownames(amat))!=ncol(amat)){ rownames(amat)<-1:ncol(amat) colnames(amat)<-1:ncol(amat)} } else { stop("'object' is not in a valid adjacency matrix form")}} if(!is(amat,"matrix")) { stop("'object' is not in a valid form")} S<-C St<-c() while(identical(S,St)==FALSE) { St<-S for(j in S){ for(i in rownames(amat)){ if(amat[i,j]%%10 == 1){ S<-c(i,S[S!=i])} } } } amatr<-amat amatt<-2*amat while(identical(amatr,amatt)==FALSE) { amatt<-amatr ############################################################1 amat21 <- amatr for (kk in M) { for (kkk in 1:ncol(amatr)) { amat31<-amatr if (amatr[kkk,kk]%%10==1|amatr[kk,kkk]%%10==1) { amat31[kk,kkk]<-amatr[kkk,kk] amat31[kkk,kk]<-amatr[kk,kkk]} idx <- which(amat31[, kk]%%10 == 1) lenidx <- length(idx) if ((lenidx > 1&amat31[kkk,kk]%%10==1)) { for (ii in 1:(lenidx )) { #for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], kkk]%%10==0&idx[ii]!=kkk){ amat21[idx[ii], kkk] <- TRUE }} } } } amatr<-amat21 ################################################################2 amat22 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in S) { idx <- which(amatr[, kk]%%10 == 1) idy <- which(amatr[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%10==0 & idx[ii]!=idy[jj]){ amat22[idx[ii], idy[jj]] <- 1 }} } } } amatr<-amat22+amatr #################################################################3 amat33<- t(amatr) amat23 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amat33[, kk]%%10 == 1) idy <- which(amat33[, kk]%%100> 9) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idy[jj], idx[ii]]%%10==0 & idx[ii]!=idy[jj]){ amat23[idy[jj], idx[ii]] <- 1 }} } } } amatr<-amat23+amatr ##################################################################4 amat24 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amatr[, kk]%%100>9) idy <- which(amatr[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%10==0 & idx[ii]!=idy[jj]){ amat24[idx[ii], idy[jj]] <- 1 }} } } } amatr<-amat24+amatr ####################################################################5 amat35<- t(amatr) amat25 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amat35[, kk]%%10 == 1) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]<100){ amat25[idx[ii], idx[jj]] <- 100 }} } } } amatr<-amat25+t(amat25)+amatr ######################################################################6 amat36<- t(amatr) amat26 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amat36[, kk]%%10 == 1) idy <- which(amat36[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idy[jj], idx[ii]]<100 & idx[ii]!=idy[jj]){ amat26[idy[jj], idx[ii]] <- 100 }} } } } amatr<-amat26+t(amat26)+amatr #################################################################7 amat27 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in S) { idx <- which(amatr[, kk]>99) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]<100){ amat27[idx[ii], idx[jj]] <- 100 }} } } } amatr<-amat27+t(amat27)+amatr ################################################################8 amat28 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in S) { idx <- which(amatr[, kk]%%10 == 1) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]%%100<10){ amat28[idx[ii], idx[jj]] <- 10 }} } } } amatr<-amat28+t(amat28)+amatr #################################################################9 amat29 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amatr[, kk]%%10 == 1) idy <- which(amatr[, kk]%%100> 9) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%100<10 & idx[ii]!=idy[jj]){ amat29[idx[ii], idy[jj]] <- 10 }} } } } amatr<-amat29+t(amat29)+amatr ##################################################################10 amat20 <- matrix(rep(0,length(amat)),dim(amat)) for (kk in M) { idx <- which(amatr[, kk]%%100>9) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]%%100<10){ amat20[idx[ii], idx[jj]] <- 10 }} } } } amatr<-amat20+t(amat20)+amatr } for(i in 1:ncol(amatr)) { for(j in 1:ncol(amatr)) { if(amatr[i,j]%%100>9){ amatr[i,j]<-10 for(k in 1:ncol(amatr)){ if(amatr[k,j]==100){ amatr[j,k]<-1 amatr[k,j]<-0 }} } }} SS<-S SSt<-c() while(identical(SS,SSt)==FALSE) { SSt<-SS for(j in SS){ for(i in rownames(amat)) { if(amatr[i,j]%%10 == 1){ SS<-c(i,SS[SS!=i])} } } } for(i in SS){ for(j in SS) { if(amatr[i,j]%%10==1){ amatr[i,j]<-10 amatr[j,i]<-10}}} amatn <- amatr for (kk in 1:ncol(amatr)) { for (kkk in 1:ncol(amatr)) { amat3n<-amatr if (amatr[kkk,kk]%%10==1|amatr[kk,kkk]%%10==1) { amat3n[kk,kkk]<-amatr[kkk,kk] amat3n[kkk,kk]<-amatr[kk,kkk]} idx <- which(amat3n[, kk]%%10 == 1) lenidx <- length(idx) if ((lenidx > 1&amat3n[kkk,kk]%%10==1)) { for (ii in 1:(lenidx )) { #for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], kkk]%%10==0&idx[ii]!=kkk){ amatn[idx[ii], kkk] <- TRUE }} } } } amatt<-2*amat while(identical(amatr,amatt)==FALSE) { amatt<-amatr amat27n <- matrix(rep(0,length(amat)),dim(amat)) for (kk in 1:ncol(amatr)) { idx <- which(amatn[, kk]>99) lenidx <- length(idx) if ((lenidx > 1)) { for (ii in 1:(lenidx - 1)) { for (jj in (ii + 1):lenidx) { if(amatr[idx[ii], idx[jj]]<100 && (amatn[kk,idx[ii]]%%10==1 || amatn[kk,idx[jj]]%%10==1)){ amat27n[idx[ii], idx[jj]] <- 100 }} } } } amatn<-amat27n+t(amat27n)+amatn amatr<-amat27n+t(amat27n)+amatr amat22n <- matrix(rep(0,length(amat)),dim(amat)) for (kk in 1:ncol(amatr)) { idx <- which(amatn[, kk]%%10 == 1) idy <- which(amatn[, kk]> 99) lenidx <- length(idx) lenidy <- length(idy) if ((lenidx > 0 & lenidy >0)) { for (ii in 1:(lenidx)) { for (jj in 1:lenidy) { if(amatr[idx[ii], idy[jj]]%%10==0 & idx[ii]!=idy[jj]& amatn[kk,idy[jj]]%%10==1){ amat22n[idx[ii], idy[jj]] <- 1 }} } } } amatn<-amat22n+amatn amatr<-amat22n+amatr } for(i in 1:ncol(amatr)) { for(j in 1:ncol(amatr)) { if(amatr[i,j]==101){ amatr[i,j]<-1 amatr[j,i]<-0}}} Mn<-c() Cn<-c() for(i in M){ Mn<-c(Mn,which(rownames(amat)==i))} for(i in C){ Cn<-c(Cn,which(rownames(amat)==i))} if(length(Mn)>0&length(Cn)>0){ fr<-amatr[-c(Mn,Cn),-c(Mn,Cn)]} if(length(Mn)>0&length(Cn)==0){ fr<-amatr[-c(Mn),-c(Mn)]} if(length(Mn)==0&length(Cn)>0){ fr<-amatr[-c(Cn),-c(Cn)]} if(length(Mn)==0&length(Cn)==0){ fr<-amatr} if(plot==TRUE){ plotfun(fr, ...)} if(showmat==FALSE){ invisible(fr)} else{return(fr)} } ############################################################################## ############################################################################## #' Maximisation for graphs #' #' \code{Max} generates a maximal graph that induces the same independence #' model from a non-maximal graph. #' #' \code{Max} looks for non-adjacent pais of nodes that are connected by #' primitive inducing paths, and connect such pairs by an appropriate edge. #' #' @param amat An adjacency matrix, or a graph that can be a \code{graphNEL} or #' an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} #' is the number of edges of the graph, that is a sequence of triples (type, #' node1label, node2label). The type of edge can be \code{"a"} (arrows from #' node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). #' @return A matrix that consists 4 different integers as an \eqn{ij}-element: #' 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from #' \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 #' for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added #' to be associated with multiple edges of different types. The matrix is #' symmetric w.r.t full lines and bi-directed arrows. #' @author Kayvan Sadeghi #' @seealso \code{\link{MAG}}, \code{\link{MRG}}, \code{\link{msep}}, #' \code{\link{MSG}} #' @references Richardson, T.S. and Spirtes, P. (2002). Ancestral graph Markov #' models. \emph{Annals of Statistics}, 30(4), 962-1030. #' #' Sadeghi, K. and Lauritzen, S.L. (2011). Markov properties for loopless mixed #' graphs. \emph{Submitted}. \url{http://arxiv.org/abs/1109.5909}. #' @keywords graphs loopless mixed graph m-separation maximality #' @examples #' #' H <- matrix(c( 0,100, 1, 0, #' 100, 0,100, 0, #' 0,100, 0,100, #' 0, 1,100, 0), 4, 4) #' Max(H) #' Max<-function(amat) { if(class(amat)[1] == "igraph" || class(amat)[1] == "graphNEL" || class(amat)[1] == "character") { amat<-grMAT(amat)} if(is(amat,"matrix")){ if(nrow(amat)==ncol(amat)){ if(length(rownames(amat))!=ncol(amat)){ rownames(amat)<-1:ncol(amat) colnames(amat)<-1:ncol(amat)} } else { stop("'object' is not in a valid adjacency matrix form")}} if(!is(amat,"matrix")) { stop("'object' is not in a valid form")} na<-ncol(amat) at<-which(amat+t(amat)+diag(na)==0,arr.ind=TRUE) if(dim(at)[1]>0){ for(i in 1:dim(at)[1]){ S<-at[i,] St<-c() while(identical(S,St)==FALSE){ St<-S for(j in S){ for(k in 1:na){ if(amat[k,j]%%10 == 1){ S<-c(k,S[S!=k])} } }} one<-at[i,1] two<-at[i,2] onearrow<-c() onearc<-c() twoarrow<-c() twoarc<-c() Sr<-S Sr<-Sr[Sr!=at[i,1]] Sr<-Sr[Sr!=at[i,2]] if(length(Sr)>0){ for(j in Sr){ if(amat[one,j]%%10==1){ onearrow<-c(onearrow,j)} if(amat[one,j]>99){ onearc<-c(onearc,j)}} for(j in Sr){ if(amat[two,j]%%10==1){ for(k in onearc){ if(j==k || amat[j,k]>99){ amat[at[i,2],at[i,1]]<-1}} twoarrow<-c(twoarrow,j)} if(amat[two,j]>99){ for(k in onearrow){ if(j==k || amat[j,k]>99){ amat[at[i,1],at[i,2]]<-1}} for(k in onearc){ if(j==k || amat[j,k]>99){ amat[at[i,1],at[i,2]]<-100 amat[at[i,2],at[i,1]]<-100}} twoarc<-c(twoarc,j)}}} if(length(c(onearc,onearrow,twoarc,twoarrow))>0){ for(j in c(onearc,onearrow,twoarc,twoarrow)){ Sr<-Sr[Sr!=j]} onearcn<-c() twoarcn<-c() onearrown<-c() twoarrown<-c() while(length(Sr)>0){ for(l in onearc){ for(j in Sr){ if(amat[l,j]>99){ onearcn<-c(onearcn,j)}}} for(l in onearrow){ for(j in Sr){ if(amat[l,j]>99){ onearrown<-c(onearrown,j)}}} for(l in twoarc){ for(j in Sr){ if(amat[l,j]>99){ for(k in onearrow){ if(j==k || amat[j,k]>99){ amat[at[i,1],at[i,2]]<-1}} for(k in onearc){ if(j==k || amat[j,k]>99){ amat[at[i,1],at[i,2]]<-100 amat[at[i,2],at[i,1]]<-100}} twoarcn<-c(twoarcn,j)}}} for(l in twoarrow){ for(j in Sr){ if(amat[l,j]>99){ for(k in onearc){ if(j==k || amat[j,k]>99){ amat[at[i,1],at[i,2]]<-100 amat[at[i,2],at[i,1]]<-100}} twoarrown<-c(twoarrown,j)}}} if(length(c(onearcn,onearrown,twoarcn,twoarrown))==0){ break} for(j in c(onearcn,onearrown,twoarcn,twoarrown)){ Sr<-Sr[Sr!=j]} onearc<-onearcn twoarc<-twoarcn onearrow<-onearrown twoarrow<-twoarrown onearcn<-c() twoarcn<-c() onearrown<-c() twoarrown<-c()}}}} return(amat) } ##################################################################################################### ###################################################################################################### #' The m-separation criterion #' #' \code{msep} determines whether two set of nodes are m-separated by a third #' set of nodes. #' #' #' @param a An adjacency matrix, or a graph that can be a \code{graphNEL} or an #' \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} is #' the number of edges of the graph, that is a sequence of triples (type, #' node1label, node2label). The type of edge can be \code{"a"} (arrows from #' node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). #' @param alpha A subset of the node set of \code{a} #' @param beta Another disjoint subset of the node set of \code{a} #' @param C A third disjoint subset of the node set of \code{a} #' @return A logical value. \code{TRUE} if \code{alpha} and \code{beta} are #' m-separated given \code{C}. \code{FALSE} otherwise. #' @author Kayvan Sadeghi #' @seealso \code{\link{dSep}}, \code{\link{MarkEqMag}} #' @references Richardson, T.S. and Spirtes, P. (2002) Ancestral graph Markov #' models. \emph{Annals of Statistics}, 30(4), 962-1030. #' #' Sadeghi, K. and Lauritzen, S.L. (2011). Markov properties for loopless mixed #' graphs. \emph{Submitted}, 2011. URL \url{http://arxiv.org/abs/1109.5909}. #' @keywords graphs d-separation m-separation mixed graph #' @examples #' #' H <-matrix(c(0,0,0,0, #' 1,0,0,1, #' 0,1,0,0, #' 0,0,0,0),4,4) #' msep(H,1,4, 2) #' msep(H,1,4, c()) #' msep<-function(a,alpha,beta,C=c()){ if(class(a)[1] == "igraph" || class(a)[1] == "graphNEL" || class(a)[1] == "character") { a<-grMAT(a)} if(is(a,"matrix")){ if(nrow(a)==ncol(a)){ if(length(rownames(a))!=ncol(a)){ rownames(a)<-1:ncol(a) colnames(a)<-1:ncol(a)} } else { stop("'object' is not in a valid adjacency matrix form")}} if(!is(a,"matrix")) { stop("'object' is not in a valid form")} M<-rem(rownames(a),c(alpha,beta,C)) ar<-Max(RG(a,M,C)) #aralpha<-as.matrix(ar[SPl(c(alpha,beta),alpha),SPl(c(alpha,beta),beta)]) #arbeta<-as.matrix(ar[SPl(c(alpha,beta),beta),SPl(c(alpha,beta),alpha)]) #for(i in 1:length(alpha)){ # for(j in 1:length(beta)){ # if(aralpha[j,i]!=0 || arbeta[j,i]!=0){ # return("NOT separated") # break # break}}} if(max(ar[as.character(beta),as.character(alpha)]+ar[as.character(alpha),as.character(beta)]!=0)){ return(FALSE)} return(TRUE) } ############################################################################ ############################################################################ #' Maximal ribbonless graph #' #' \code{MRG} generates and plots maximal ribbonless graphs (a modification of #' MC graph to use m-separation) after marginalisation and conditioning. #' #' This function uses the functions \code{\link{RG}} and \code{\link{Max}}. #' #' @param amat An adjacency matrix, or a graph that can be a \code{graphNEL} or #' an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} #' is the number of edges of the graph, that is a sequence of triples (type, #' node1label, node2label). The type of edge can be \code{"a"} (arrows from #' node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). #' @param M A subset of the node set of \code{a} that is going to be #' marginalized over #' @param C Another disjoint subset of the node set of \code{a} that is going #' to be conditioned on. #' @param showmat A logical value. \code{TRUE} (by default) to print the #' generated matrix. #' @param plot A logical value, \code{FALSE} (by default). \code{TRUE} to plot #' the generated graph. #' @param plotfun Function to plot the graph when \code{plot == TRUE}. Can be #' \code{plotGraph} (the default) or \code{drawGraph}. #' @param \dots Further arguments passed to \code{plotfun}. #' @return A matrix that consists 4 different integers as an \eqn{ij}-element: #' 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from #' \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 #' for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added #' to be associated with multiple edges of different types. The matrix is #' symmetric w.r.t full lines and bi-directed arrows. #' @author Kayvan Sadeghi #' @seealso \code{\link{MAG}}, \code{\link{Max}}, \code{\link{MSG}}, #' \code{\link{RG}} #' @references Koster, J.T.A. (2002). Marginalizing and conditioning in #' graphical models. \emph{Bernoulli}, 8(6), 817-840. #' #' Richardson, T.S. and Spirtes, P. (2002). Ancestral graph Markov models. #' \emph{Annals of Statistics}, 30(4), 962-1030. #' #' Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic #' graphs. \emph{Submitted}. #' #' Sadeghi, K. and Lauritzen, S.L. (2011). Markov properties for loopless mixed #' graphs. \emph{Submitted}. URL \url{http://arxiv.org/abs/1109.5909}. #' @keywords graphs directed acyclic graph marginalisation and conditioning #' maximality of graphs MC graph ribbonless graph #' @examples #' #' ex <- matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ##The adjacency matrix of a DAG #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0),16,16, byrow = TRUE) #' M <- c(3,5,6,15,16) #' C <- c(4,7) #' MRG(ex, M, C, plot = TRUE) #' ################################################### #' H <- matrix(c( 0, 100, 1, 0, #' 100, 0, 100, 0, #' 0, 100, 0, 100, #' 0, 1, 100, 0), 4,4) #' Max(H) #' `MRG` <- function (amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, ...) { return(Max(RG(amat,M,C,showmat,plot, plotfun = plotGraph, ...))) } ########################################################################## ########################################################################## #' Maximal summary graph #' #' \code{MAG} generates and plots maximal summary graphs after marginalization #' and conditioning. #' #' This function uses the functions \code{\link{SG}} and \code{\link{Max}}. #' #' @param amat An adjacency matrix of a MAG, or a graph that can be a #' \code{graphNEL} or an \code{\link{igraph}} object or a vector of length #' \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a #' sequence of triples (type, node1label, node2label). The type of edge can be #' \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} #' (lines). #' @param M A subset of the node set of \code{a} that is going to be #' marginalized over #' @param C Another disjoint subset of the node set of \code{a} that is going #' to be conditioned on. #' @param showmat A logical value. \code{TRUE} (by default) to print the #' generated matrix. #' @param plot A logical value, \code{FALSE} (by default). \code{TRUE} to plot #' the generated graph. #' @param plotfun Function to plot the graph when \code{plot == TRUE}. Can be #' \code{plotGraph} (the default) or \code{drawGraph}. #' @param \dots Further arguments passed to \code{plotfun}. #' @return A matrix that consists 4 different integers as an \eqn{ij}-element: #' 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from #' \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 #' for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added #' to be associated with multiple edges of different types. The matrix is #' symmetric w.r.t full lines and bi-directed arrows. #' @author Kayvan Sadeghi #' @seealso \code{\link{MAG}}, \code{\link{Max}}, \code{\link{MRG}}, #' \code{\link{SG}} #' @references Richardson, T.S. and Spirtes, P. (2002). Ancestral graph Markov #' models. \emph{Annals of Statistics}, 30(4), 962-1030. #' #' Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic #' graphs. \emph{Submitted}. #' #' Sadeghi, K. and Lauritzen, S.L. (2011). Markov properties for loopless mixed #' graphs. \emph{Submitted}. URL \url{http://arxiv.org/abs/1109.5909}. #' #' Wermuth, N. (2011). Probability distributions with summary graph structure. #' \emph{Bernoulli}, 17(3), 845-879. #' @keywords graphs directed acyclic graph marginalisation and conditioning #' maximality of graphs summary graph #' @examples #' #' ex<-matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ##The adjacency matrix of a DAG #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0), 16, 16, byrow=TRUE) #' M <- c(3,5,6,15,16) #' C <- c(4,7) #' MSG(ex,M,C,plot=TRUE) #' ################################################### #' H<-matrix(c(0,100,1,0,100,0,100,0,0,100,0,100,0,1,100,0),4,4) #' Max(H) #' `MSG` <- function (amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, ...) { return(Max(SG(amat,M,C,showmat,plot, plotfun = plotGraph, ...))) } ############################################################################ ########################################################################### #' Maximal ancestral graph #' #' \code{MAG} generates and plots maximal ancestral graphs after #' marginalisation and conditioning. #' #' This function uses the functions \code{\link{AG}} and \code{\link{Max}}. #' #' @param amat An adjacency matrix, or a graph that can be a \code{graphNEL} or #' an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} #' is the number of edges of the graph, that is a sequence of triples (type, #' node1label, node2label). The type of edge can be \code{"a"} (arrows from #' node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). #' @param M A subset of the node set of \code{a} that is going to be #' marginalized over #' @param C Another disjoint subset of the node set of \code{a} that is going #' to be conditioned on. #' @param showmat A logical value. \code{TRUE} (by default) to print the #' generated matrix. #' @param plot A logical value, \code{FALSE} (by default). \code{TRUE} to plot #' the generated graph. #' @param plotfun Function to plot the graph when \code{plot == TRUE}. Can be #' \code{plotGraph} (the default) or \code{drawGraph}. #' @param \dots Further arguments passed to \code{plotfun}. #' @return A matrix that consists 4 different integers as an \eqn{ij}-element: #' 0 for a missing edge between \eqn{i} and \eqn{j}, 1 for an arrow from #' \eqn{i} to \eqn{j}, 10 for a full line between \eqn{i} and \eqn{j}, and 100 #' for a bi-directed arrow between \eqn{i} and \eqn{j}. These numbers are added #' to be associated with multiple edges of different types. The matrix is #' symmetric w.r.t full lines and bi-directed arrows. #' @author Kayvan Sadeghi #' @seealso \code{\link{AG}}, \code{\link{Max}}, \code{\link{MRG}}, #' \code{\link{MSG}} #' @references Richardson, T. S. and Spirtes, P. (2002). Ancestral graph Markov #' models. \emph{Annals of Statistics}, 30(4), 962-1030. #' #' Sadeghi, K. (2011). Stable classes of graphs containing directed acyclic #' graphs. \emph{Submitted}. #' #' Sadeghi, K. and Lauritzen, S.L. (2011). Markov properties for loopless #' mixed graphs. \emph{Submitted}. URL \url{http://arxiv.org/abs/1109.5909}. #' @keywords ancestral graph directed acyclic graph marginalization and #' conditioning maximality of graphs #' @examples #' #' ex<-matrix(c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ##The adjacency matrix of a DAG #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, #' 0,0,0,0,1,0,1,0,1,1,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, #' 1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0, #' 0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0), 16, 16, byrow = TRUE) #' M <- c(3,5,6,15,16) #' C <- c(4,7) #' MAG(ex, M, C, plot=TRUE) #' ################################################### #' H <- matrix(c(0,100,1,0,100,0,100,0,0,100,0,100,0,1,100,0),4,4) #' Max(H) #' `MAG`<-function (amat,M=c(),C=c(),showmat=TRUE,plot=FALSE, plotfun = plotGraph, ...) { return(Max(AG(amat,M,C,showmat,plot, plotfun = plotGraph, ...))) } ############################################################################ ############################################################################ #Plot<-function(a) #{ # if(class(a)[1] == "igraph" || class(a)[1] == "graphNEL" || class(a)[1] == "character") { # a<-grMAT(a)} # if(is(a,"matrix")){ # if(nrow(a)==ncol(a)){ # if(length(rownames(a))!=ncol(a)){ # rownames(a)<-1:ncol(a) # colnames(a)<-1:ncol(a)} # l1<-c() # l2<-c() # for (i in 1:nrow(a)){ # for (j in i:nrow(a)){ # if (a[i,j]==1){ # l1<-c(l1,i,j) # l2<-c(l2,2)} # if (a[j,i]%%10==1){ # l1<-c(l1,j,i) # l2<-c(l2,2)} # if (a[i,j]==10){ # l1<-c(l1,i,j) # l2<-c(l2,0)} # if (a[i,j]==11){ # l1<-c(l1,i,j,i,j) # l2<-c(l2,2,0)} # if (a[i,j]==100){ # l1<-c(l1,i,j) # l2<-c(l2,3)} # if (a[i,j]==101){ # l1<-c(l1,i,j,i,j) # l2<-c(l2,2,3)} # if (a[i,j]==110){ # l1<-c(l1,i,j,i,j) # l2<-c(l2,0,3)} # if (a[i,j]==111){ # l1<-c(l1,i,j,i,j,i,j) # l2<-c(l2,2,0,3)} # } # } # } # else { # stop("'object' is not in a valid adjacency matrix form") # } # if(length(l1)>0){ # l1<-l1-1 # agr<-graph(l1,n=nrow(a),directed=TRUE)} # if(length(l1)==0){ # agr<-graph.empty(n=nrow(a), directed=TRUE) # return(plot(agr,vertex.label=rownames(a)))} # return( tkplot(agr, layout=layout.kamada.kawai, edge.curved=FALSE:TRUE, vertex.label=rownames(a),edge.arrow.mode=l2)) # } # else { # stop("'object' is not in a valid format")} #} ############################################################################ ############################################################################ #' Markov equivalence for regression chain graphs. #' #' \code{MarkEqMag} determines whether two RCGs (or subclasses of RCGs) are #' Markov equivalent. #' #' The function checks whether the two graphs have the same skeleton and #' unshielded colliders. #' #' @param amat An adjacency matrix of an RCG or a graph that can be a #' \code{graphNEL} or an \code{\link{igraph}} object or a vector of length #' \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a #' sequence of triples (type, node1label, node2label). The type of edge can be #' \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} #' (lines). #' @param bmat The same as \code{amat}. #' @return "Markov Equivalent" or "NOT Markov Equivalent". #' @author Kayvan Sadeghi #' @seealso \code{\link{MarkEqMag}}, \code{\link{msep}} #' @references Wermuth, N. and Sadeghi, K. (2011). Sequences of regressions and #' their independences. \emph{TEST}, To appear. #' \url{http://arxiv.org/abs/1103.2523}. #' @keywords graphs bidirected graph directed acyclic graph Markov equivalence #' regression chain graph undirected graph multivariate #' @examples #' #' H1<-matrix(c(0,100,0,0,0,100,0,100,0,0,0,100,0,0,0,1,0,0,0,100,0,0,1,100,0),5,5) #' H2<-matrix(c(0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,100,0,0,1,100,0),5,5) #' H3<-matrix(c(0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0),5,5) #' #MarkEqRcg(H1,H2) #' #MarkEqRcg(H1,H3) #' #MarkEqRcg(H2,H3) #' MarkEqRcg<-function(amat,bmat) { if(class(amat)[1] == "igraph"){ amat<-grMAT(amat)} if(class(amat)[1] == "graphNEL"){ amat<-grMAT(amat)} if(class(amat)[1] == "character"){ amat<-grMAT(amat)} if( length(rownames(amat))!=ncol(amat) | length(colnames(amat))!=ncol(amat)){ rownames(amat)<-1:ncol(amat) colnames(amat)<-1:ncol(amat)} if(class(bmat)[1] == "igraph"){ bmat<-grMAT(bmat)} if(class(bmat)[1] == "graphNEL"){ bmat<-grMAT(bmat)} if(class(bmat)[1] == "character"){ bmat<-grMAT(bmat)} if( length(rownames(bmat))!=ncol(bmat) | length(colnames(bmat))!=ncol(bmat)){ rownames(bmat)<-1:ncol(bmat) colnames(bmat)<-1:ncol(bmat)} bmat<-bmat[rownames(amat),colnames(amat),drop=FALSE] na<-ncol(amat) nb<-ncol(bmat) if(na != nb){ return(FALSE)} at<-which(amat+t(amat)+diag(na)==0,arr.ind=TRUE) bt<-which(bmat+t(bmat)+diag(na)==0,arr.ind=TRUE) if(identical(at,bt)==FALSE){ return(FALSE)} ai<-c() bi<-c() if(dim(at)[1]!=0){ for(i in 1:dim(at)[1]){ for(j in 1:na){ if((amat[at[i,1],j]%%10==1 || amat[at[i,1],j]>99) &&(amat[at[i,2],j]%%10==1 || amat[at[i,2],j]>99) ){ ai<-c(ai,j)} if((bmat[bt[i,1],j]%%10==1 || bmat[bt[i,1],j]>99) &&(bmat[bt[i,2],j]%%10==1 || bmat[bt[i,2],j]>99) ){ bi<-c(bi,j)}} if(identical(ai,bi)==FALSE){ return(FALSE)} ai<-c() bi<-c()}} return(TRUE) } ######################################################################################## ######################################################################################## #' Markov equivalence of maximal ancestral graphs #' #' \code{MarkEqMag} determines whether two MAGs are Markov equivalent. #' #' The function checks whether the two graphs have the same skeleton and #' colliders with order. #' #' @param amat An adjacency matrix of a MAG, or a graph that can be a #' \code{graphNEL} or an \code{\link{igraph}} object or a vector of length #' \eqn{3e}, where \eqn{e} is the number of edges of the graph, that is a #' sequence of triples (type, node1label, node2label). The type of edge can be #' \code{"a"} (arrows from node1 to node2), \code{"b"} (arcs), and \code{"l"} #' (lines). #' @param bmat The same as \code{amat}. #' @return "Markov Equivalent" or "NOT Markov Equivalent". #' @author Kayvan Sadeghi #' @seealso \code{\link{MarkEqRcg}}, \code{\link{msep}} #' @references Ali, R.A., Richardson, T.S. and Spirtes, P. (2009) Markov #' equivalence for ancestral graphs. \emph{Annals of Statistics}, #' 37(5B),2808-2837. #' @keywords graphs Markov equivalence maximal ancestral graphs multivariate #' @examples #' #' H1<-matrix( c(0,100, 0, 0, #' 100, 0,100, 0, #' 0,100, 0,100, #' 0, 1,100, 0), 4, 4) #' H2<-matrix(c(0,0,0,0,1,0,100,0,0,100,0,100,0,1,100,0),4,4) #' H3<-matrix(c(0,0,0,0,1,0,0,0,0,1,0,100,0,1,100,0),4,4) #' MarkEqMag(H1,H2) #' MarkEqMag(H1,H3) #' MarkEqMag(H2,H3) #' `MarkEqMag` <- function(amat,bmat) { if(class(amat)[1] == "igraph"){ amat<-grMAT(amat)} if(class(amat)[1] == "graphNEL"){ amat<-grMAT(amat)} if(class(amat)[1] == "character"){ amat<-grMAT(amat)} if( length(rownames(amat))!=ncol(amat) | length(colnames(amat))!=ncol(amat)){ rownames(amat)<-1:ncol(amat) colnames(amat)<-1:ncol(amat)} if(class(bmat)[1] == "igraph"){ bmat<-grMAT(bmat)} if(class(bmat)[1] == "graphNEL"){ bmat<-grMAT(bmat)} if(class(bmat)[1] == "character"){ bmat<-grMAT(bmat)} if( length(rownames(bmat))!=ncol(bmat) | length(colnames(bmat))!=ncol(bmat)){ rownames(bmat)<-1:ncol(bmat) colnames(bmat)<-1:ncol(bmat)} bmat<-bmat[rownames(amat),colnames(amat),drop=FALSE] na<-ncol(amat) nb<-ncol(bmat) if(na != nb){ return(FALSE)} at<-which(amat+t(amat)+diag(na)==0,arr.ind=TRUE) bt<-which(bmat+t(bmat)+diag(na)==0,arr.ind=TRUE) if(identical(at,bt)==FALSE){ return(FALSE)} a<-c() b<-c() if(length(at)>0){ for(i in 1:dim(at)[1]){ for(j in 1:na){ if((amat[at[i,1],j]%%10==1 || amat[at[i,1],j]>99) &&(amat[at[i,2],j]%%10==1 || amat[at[i,2],j]>99) ){ a<-rbind(a,c(at[i,1],j,at[i,2]))} if((bmat[bt[i,1],j]%%10==1 || bmat[bt[i,1],j]>99) &&(bmat[bt[i,2],j]%%10==1 || bmat[bt[i,2],j]>99) ){ b<-rbind(b,c(bt[i,1],j,bt[i,2]))}}} if(identical(a,b)==FALSE){ return(FALSE)}} ar<-which(amat%%10==1,arr.ind=TRUE) br<-which(bmat%%10==1,arr.ind=TRUE) ap<-c() bp<-c() if(length(ar)>0){ for(i in 1:dim(ar)[1]){ for(j in 1:na){ if((amat[ar[i,1],j]>99) &&(amat[ar[i,2],j]>99)){ ap<-rbind(ap,c(ar[i,1],j,ar[i,2]))}}}} if(length(br)>0){ for(i in 1:dim(br)[1]){ for(j in 1:nb){ if((bmat[br[i,1],j]>99) &&(bmat[br[i,2],j]>99)){ bp<-rbind(bp,c(br[i,1],j,br[i,2]))}}}} if(length(ap)>0){ aptt<-ap apt<-c(ap,1) Qonen<-c() Qtwon<-c() while(length(apt)-length(aptt)>0){ apt<-aptt for(i in (1:dim(ap)[1])){ Qone<-ap[i,1] Qtwo<-ap[i,2] while(length(Qone)>0){ for(l in (1:length(Qone))){ J<-which(((amat+t(amat)+diag(na))[ap[i,3],]==0) & ((amat[Qone[l],]>99) | (amat[,Qone[l]]%%100==1))) for(j in J){ for(k in 1:dim(a)[1]){ if(min(a[k,]==c(j,Qone[l],Qtwo[l]))==1){ a<-rbind(a,ap[i,]) aptt<-aptt[(1:dim(aptt)[1])[-i],] break break break break}}} Q<-which((amat[,ap[i,3]]%%10==1) & (amat[Qone[l],]>99)) for(q in Q){ for(k in 1:dim(a)[1]){ if(min(a[k,]==c(q,Qone[l],Qtwo[l]))==1){ Qtwon<-c(Qtwon,Qone[l]) Qonen<-c(Qonen,q)}}}} Qtwo<-Qtwon Qone<-Qonen Qonen<-c() Qtwon<-c()}}}} if(length(bp)>0){ bptt<-bp bpt<-c(bp,1) Qbonen<-c() Qbtwon<-c() while(length(bpt)-length(bptt)>0){ bpt<-bptt for(i in (1:dim(bp)[1])){ Qbone<-bp[i,1] Qbtwo<-bp[i,2] while(length(Qbone)>0){ for(l in (1:length(Qbone))){ J<-which(((bmat+t(bmat)+diag(nb))[bp[i,3],]==0) & ((bmat[Qbone[l],]>99) | (bmat[,Qbone[l]]%%100==1))) for(j in J){ for(k in 1:dim(b)[1]){ if(min(b[k,]==c(j,Qbone[l],Qbtwo[l]))==1){ b<-rbind(b,bp[i,]) bptt<-bptt[(1:dim(bptt)[1])[-i],] break break break break}}} Qb<-which((bmat[,bp[i,3]]%%10==1) & (bmat[Qbone[l],]>99)) for(q in Qb){ for(k in 1:dim(b)[1]){ if(min(b[k,]==c(q,Qbone[l],Qbtwo[l]))==1){ Qbtwon<-c(Qbtwon,Qbone[l]) Qbonen<-c(Qbonen,q)}}}} Qbtwo<-Qbtwon Qbone<-Qbonen Qbonen<-c() Qbtwon<-c()}}}} if(length(a)!=length(b)){ return(FALSE)} f<-c() if((length(a)>0) && (length(b)>0)){ for(i in 1:dim(a)[1]){ for(j in 1:dim(b)[1]){ f<-c(f,min(a[i,]==b[j,]))} if(max(f)==0){ return(FALSE)}}} return(TRUE) } ########################################################################################## ########################################################################################### #' Representational Markov equivalence to undirected graphs. #' #' \code{RepMarUG} determines whether a given maximal ancestral graph can be #' Markov equivalent to an undirected graph, and if that is the case, it finds #' an undirected graph that is Markov equivalent to the given graph. #' #' \code{RepMarBG} looks for presence of an unshielded collider V-configuration #' in graph. #' #' @param amat An adjacency matrix, or a graph that can be a \code{graphNEL} or #' an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} #' is the number of edges of the graph, that is a sequence of triples (type, #' node1label, node2label). The type of edge can be \code{"a"} (arrows from #' node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). #' @return A list with two components: \code{verify} and \code{amat}. #' \code{verify} is a logical value, \code{TRUE} if there is a representational #' Markov equivalence and \code{FALSE} otherwise. \code{amat} is either #' \code{NA} if \code{verify == FALSE} or the adjacency matrix of the generated #' graph, if \code{verify == TRUE}. In this case it consists of 4 different #' integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and #' \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between #' \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and #' \eqn{j}. These numbers are added to be associated with multiple edges of #' different types. The matrix is symmetric w.r.t full lines and bi-directed #' arrows. #' @author Kayvan Sadeghi #' @seealso \code{\link{MarkEqMag}}, \code{\link{MarkEqRcg}}, #' \code{\link{RepMarBG}}, \code{\link{RepMarDAG}} #' @references Sadeghi, K. (2011). Markov equivalences for subclasses of #' loopless mixed graphs. \emph{Submitted}, 2011. #' @keywords graphs bidirected graph Markov equivalence maximal ancestral graph #' representational Markov equivalence #' @examples #' #' H<-matrix(c(0,10,0,0,10,0,0,0,0,1,0,100,0,0,100,0),4,4) #' RepMarUG(H) #' RepMarUG<-function(amat) { if(class(amat)[1] == "igraph"){ amat<-grMAT(amat)} if(class(amat)[1] == "graphNEL"){ amat<-grMAT(amat)} if(class(amat)[1] == "character"){ amat<-grMAT(amat)} if( length(rownames(amat))!=ncol(amat) | length(colnames(amat))!=ncol(amat)){ rownames(amat)<-1:ncol(amat) colnames(amat)<-1:ncol(amat)} na<-ncol(amat) at<-which(amat+t(amat)+diag(na)==0,arr.ind=TRUE) if(dim(at)[1]!=0){ for(i in 1:dim(at)[1]){ for(j in 1:na){ if((amat[at[i,1],j]%%10==1 || amat[at[i,1],j]>99) &&(amat[at[i,2],j]%%10==1 || amat[at[i,2],j]>99) ){ return(list(verify = FALSE, amat = NA))}}}} for(i in 1:na){ for(j in 1:na){ if(amat[i,j]==100){ amat[i,j]<-10} if(amat[i,j]==1){ amat[i,j]<-10 amat[j,i]<-10}}} return(list(verify = TRUE, amat = amat)) } ######################################################################################## ######################################################################################### #' Representational Markov equivalence to bidirected graphs. #' #' \code{RepMarBG} determines whether a given maximal ancestral graph can be #' Markov equivalent to a bidirected graph, and if that is the case, it finds a #' bidirected graph that is Markov equivalent to the given graph. #' #' \code{RepMarBG} looks for presence of an unshielded non-collider #' V-configuration in graph. #' #' @param amat An adjacency matrix, or a graph that can be a \code{graphNEL} or #' an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} #' is the number of edges of the graph, that is a sequence of triples (type, #' node1label, node2label). The type of edge can be \code{"a"} (arrows from #' node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). #' @return A list with two components: \code{verify} and \code{amat}. #' \code{verify} is a logical value, \code{TRUE} if there is a representational #' Markov equivalence and \code{FALSE} otherwise. \code{amat} is either #' \code{NA} if \code{verify == FALSE} or the adjacency matrix of the generated #' graph, if \code{verify == TRUE}. In this case it consists of 4 different #' integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and #' \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between #' \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and #' \eqn{j}. These numbers are added to be associated with multiple edges of #' different types. The matrix is symmetric w.r.t full lines and bi-directed #' arrows. #' @author Kayvan Sadeghi #' @seealso \code{\link{MarkEqMag}}, \code{\link{MarkEqRcg}}, #' \code{\link{RepMarDAG}}, \code{\link{RepMarUG}} #' @references Sadeghi, K. (2011). Markov equivalences for subclasses of #' loopless mixed graphs. \emph{Submitted}, 2011. #' @keywords graphs bidirected graph Markov equivalence maximal ancestral graph #' representational Markov equivalence #' @examples #' #' H<-matrix(c(0,10,0,0,10,0,0,0,0,1,0,100,0,0,100,0),4,4) #' RepMarBG(H) #' RepMarBG<-function(amat) { if(class(amat)[1] == "igraph"){ amat<-grMAT(amat)} if(class(amat)[1] == "graphNEL"){ amat<-grMAT(amat)} if(class(amat)[1] == "character"){ amat<-grMAT(amat)} if( length(rownames(amat))!=ncol(amat) | length(colnames(amat))!=ncol(amat)){ rownames(amat)<-1:ncol(amat) colnames(amat)<-1:ncol(amat)} na<-ncol(amat) at<-which(amat+t(amat)+diag(na)==0,arr.ind=TRUE) if(dim(at)[1]!=0){ for(i in 1:dim(at)[1]){ for(j in 1:na){ if(amat[at[i,1],j]%%100>9 || amat[j,at[i,1]]%%10==1 || amat[at[i,2],j]%%100>9 || amat[j,at[i,2]]%%10==1){ return(list(verify= FALSE, amat = NA))}}}} for(i in 1:na){ for(j in 1:na){ if(amat[i,j]==10){ amat[i,j]<-100} if(amat[i,j]==1){ amat[i,j]<-100 amat[j,i]<-100}}} return(list(verify = TRUE, amat = amat)) } ######################################################################################## ######################################################################################## #' Representational Markov equivalence to directed acyclic graphs. #' #' \code{RepMarDAG} determines whether a given maximal ancestral graph can be #' Markov equivalent to a directed acyclic graph, and if that is the case, it #' finds a directed acyclic graph that is Markov equivalent to the given graph. #' #' \code{RepMarDAG} first looks whether the subgraph induced by full lines is #' chordal and whether there is a minimal collider path or cycle of length 4 in #' graph. #' #' @param amat An adjacency matrix, or a graph that can be a \code{graphNEL} or #' an \code{\link{igraph}} object or a vector of length \eqn{3e}, where \eqn{e} #' is the number of edges of the graph, that is a sequence of triples (type, #' node1label, node2label). The type of edge can be \code{"a"} (arrows from #' node1 to node2), \code{"b"} (arcs), and \code{"l"} (lines). #' @return A list with two components: \code{verify} and \code{amat}. #' \code{verify} is a logical value, \code{TRUE} if there is a representational #' Markov equivalence and \code{FALSE} otherwise. \code{amat} is either #' \code{NA} if \code{verify == FALSE} or the adjacency matrix of the generated #' graph, if \code{verify == TRUE}. In this case it consists of 4 different #' integers as an \eqn{ij}-element: 0 for a missing edge between \eqn{i} and #' \eqn{j}, 1 for an arrow from \eqn{i} to \eqn{j}, 10 for a full line between #' \eqn{i} and \eqn{j}, and 100 for a bi-directed arrow between \eqn{i} and #' \eqn{j}. These numbers are added to be associated with multiple edges of #' different types. The matrix is symmetric w.r.t full lines and bi-directed #' arrows. #' @author Kayvan Sadeghi #' @seealso \code{\link{MarkEqMag}}, \code{\link{MarkEqRcg}}, #' \code{\link{RepMarBG}}, \code{\link{RepMarUG}} #' @references Sadeghi, K. (2011). Markov equivalences for subclasses of #' loopless mixed graphs. \emph{Submitted}, 2011. #' @keywords graphs bidirected graph Markov equivalence maximal ancestral graph #' representational Markov equivalence #' @examples #' #' H<-matrix(c(0,10,0,0,10,0,0,0,0,1,0,100,0,0,100,0),4,4) #' RepMarBG(H) #' RepMarDAG<-function(amat) { if(class(amat)[1] == "igraph"){ amat<-grMAT(amat)} if(class(amat)[1] == "graphNEL"){ amat<-grMAT(amat)} if(class(amat)[1] == "character"){ amat<-grMAT(amat)} if(length(rownames(amat))!=ncol(amat) | length(colnames(amat))!=ncol(amat)){ rownames(amat)<-1:ncol(amat) colnames(amat)<-1:ncol(amat)} na<-ncol(amat) full<-sort(unique(which(amat%%100>9,arr.ind=TRUE)[,1])) arc<-sort(unique(which(amat>99,arr.ind=TRUE)[,1])) arrow<-sort(unique(as.vector(which(amat%%10==1,arr.ind=TRUE)))) S<-full[full!=full[1]] Ma<-full[1] while(length(S)>0){ dim<-c() for(i in S){ dim<-c(dim,length(which(amat[i,Ma]%%100>9,arr.ind=TRUE)))} s<-S[which(dim==max(dim))[1]] ns<-which(amat[s,Ma]%%100>9,arr.ind=TRUE) if(min(amat[ns,ns]+diag(length(ns)))==0){ return(FALSE)} Ma<-c(Ma,s) S<-S[S!=s]} at<-which(amat+t(amat)==0,arr.ind=TRUE) ai<-c() if(length(at[,1])!=0){ for(i in (1:length(at[,1]))){ for(j in 1:na){ if((amat[at[i,1],j]%%10==1 || amat[at[i,1],j]>99) &&(amat[at[i,2],j]%%10!=1) && (amat[at[i,2],j]<100) ){ ai<-c(ai,j)}} if(length(ai)==0){ break} for(j in ((1:na)[-ai])){ if((max(amat[ai,j]>99)==1) &&(amat[at[i,2],j]%%10==1 || amat[at[i,2],j]>99) && (amat[at[i,1],j]%%10!=1) && (amat[at[i,1],j]<100)){ return(list(verify = FALSE, amat = NA))}} ai<-c()}} for(i in Ma){ v<-which(amat[i,]%%100>9) for(j in v){ amat[i,j]<-1 amat[j,i]<-0}} at<-which(amat+t(amat)+diag(na)==0,arr.ind=TRUE) if(length(at[,1])!=0){ for(i in (1:length(at[,1]))){ for(j in 1:na){ if((amat[at[i,1],j]%%10==1 || amat[at[i,1],j]>99) &&(amat[at[i,2],j]%%10==1 || amat[at[i,2],j]>99) ){ amat[at[i,1],j]<-1 amat[at[i,2],j]<-1}}}} O<-c() Oc<-arrow while(length(Oc)>0){ for(i in Oc){ if(max(amat[i,Oc]%%10)==0){ O<-c(O,i) break}} Oc<-Oc[Oc!=i]} for(i in arc){ if(length(which(arrow==i))==0){ O<-c(O,i)}} aarc<-which(amat>99,arr.ind=TRUE) if(length(aarc)[1]>0){ for(i in 1:(length(aarc[,1]))){ # if(length(O)==0){ # amat[aarc[i,1],aarc[i,2]]<-0 if(which(O==aarc[i,1])>which(O==aarc[i,2])){ amat[aarc[i,1],aarc[i,2]]<-1} else{ amat[aarc[i,1],aarc[i,2]]<-0}}} return(list(verify = TRUE, amat = amat)) } ################################################################################## ggm/MD50000644000176200001440000001034713622245342011344 0ustar liggesusersfb6274d3e7f4de4a8406721ad949f771 *DESCRIPTION 216249f73b60e9ade64c537b636ee3c1 *NAMESPACE f7215f286638493c5f992bb706ed6eac *NEWS bfd8cb253f14c7c24214479c064424fc *R/functions.R 34e24385d5e8662021c7b0a0f1519ae6 *R/ggm_latest-package.R 8e7390f076a0bb9f9d82ed8ac456a2da *R/kayvan.R fa242bd826362329beeb9f0e8f1094be *data/anger.rda 28055191f4a4aaf467145937e9e7814a *data/derived.rda 0b5c0cc117eae4754f82b412f36d263d *data/glucose.rda 5815935f611d23a4532a2653fdb543dd *data/marks.rda bc1a7337f079613cdeb4100ab7bb110f *data/stress.rda 39ba6b4b85ac4f7e5b64de066a249044 *data/surdata.rda b0c3b0ba0691e1f65b9ac1a2b8502576 *man/AG.Rd 3e3875b460787ede15be2a37fc80eeeb *man/DAG.Rd ab57c098b120085c48f1b68e00efbcfa *man/DG.Rd 5647109c451f5337ca60fedddba91c65 *man/In.Rd 3e8047930d6ba3705643e0b76446ad4b *man/InducedGraphs.Rd 9a2f23053286b74b23b8650e661918b2 *man/MAG.Rd ee7b8ac40226418bc89ac500378531d1 *man/MRG.Rd 0c05912dd7c4028ccd9e2aa66217fde0 *man/MSG.Rd 549e83558f3f649a214add241bee3e7c *man/MarkEqMag.Rd 90ebb2039e3f9cef7d284d30faecb11d *man/MarkEqRcg.Rd 5c584efe468088b3f71d08c97ab71025 *man/Max.Rd 2c9924fb619d55efb9225cb112f4bc1a *man/RG.Rd 6783ffb1254ac20fa9834e6e8ddae467 *man/RepMarBG.Rd b767a6f159d718179b6073a7d7561841 *man/RepMarDAG.Rd e8a1f4b3455aa2ea8fdeb2b25a42f35e *man/RepMarUG.Rd c732050e5440dac809e69835c26a7374 *man/SG.Rd 5b17551f282e9727458639931ab84442 *man/SimpleGraphOperations.Rd 9e2e1de9fa68f6849bd995fe55411e64 *man/UG.Rd 02230d71eb702e9d0307609c20b578c0 *man/UtilityFunctions.Rd 4c3a7c6fa2967e8da1d653fb262ad028 *man/adjMatrix.Rd 32d1cd1be09ae75e6077f1d892a798d6 *man/allEdges.Rd edc26fe5ceeed922bf5874fdb1ccd69a *man/anger.Rd aa38de160e9bbc1a9e2af97de7f6d495 *man/basiSet.Rd b96013731968c659184a5e6c9c7a481b *man/bfsearch.Rd 8eae8c570f6424f9d289cf015177b8ba *man/binve.Rd 3247edd6c95ab387d6dcb91188571a60 *man/blkdiag.Rd 176bcdde4d07fee79b1f348d911d23a1 *man/blodiag.Rd 1fa84d2c8604c9b5c841d48f45cf4004 *man/checkIdent.Rd ef2346d1bfb66fa2e5c13246dd09787f *man/cmpGraph.Rd 1417175d67c3bffa2adee8f6e787a1fe *man/conComp.Rd ed4ce550a6493e7022acdbe652e658ea *man/correlations.Rd be394f4a9504b22f20be2d670f5e4e05 *man/cycleMatrix.Rd 8e568787964330762c56de123e9da675 *man/dSep.Rd 27f3124a0dca111780a45d3ce3909556 *man/derived.Rd 47ddb442232ec170174f493e0e3b1e6b *man/diagv.Rd 5490bb72bdda7683256c157ae4d4d5d5 *man/drawGraph.Rd 8dae61c18089e96db2c0f580c1799e98 *man/edgematrix.Rd f4ac6a734a33f554879b0d549e5eb244 *man/essentialGraph.Rd dd690695936eba8f3fe07c65ba3e0441 *man/findPath.Rd 3e427863b28e56743107398bf0abafb7 *man/fitAncestralGraph.Rd 06a2e0ef8e7c8ba4ef17e02fc6de3159 *man/fitConGraph.Rd e32d72597370ea8c20acb06bd30a321c *man/fitCovGraph.Rd 3c208a475bc1775b3d3a124c2be3e85a *man/fitDag.Rd 43091da524f2411d5407b431d9b9e8f0 *man/fitDagLatent.Rd da49d5347cf477b8b91dc9097c85c9c3 *man/fitmlogit.Rd dcc2116bbcdfe2ae6625681b35216edc *man/fundCycles.Rd 9869a4578e566fd05d7b8e122edf7517 *man/ggm.Rd c16b19d9456248bdc393b78cc75e8312 *man/glucose.Rd 7afefdce7674c15144f4afec67163fdb *man/grMAT.Rd dda41553ffe6bc97bc9282815862fbcd *man/icf.Rd 99787f57fbd691e1e80b05b4ee52f62b *man/isADMG.Rd fd229ee6f02d8632869246599c443ba9 *man/isAG.Rd 5dff8e89ae1254a6111978bec2c81cd8 *man/isAcyclic.Rd fd314f77b723b0408ed6d37b8efbee3d *man/isGident.Rd 9960cbbda160415a2f6ed2dd79564a42 *man/makeMG.Rd 8ccdaeec90e4b803cfbf11924bc1593c *man/marg.param.Rd 457efbee8af98a450b26d7cc0915bf15 *man/marks.Rd 75c95278a28bf5a77292fbc55580e706 *man/mat.mlogit.Rd 6efc6c11f012178c0cf868c7d74f8fab *man/msep.Rd f676955ee999cedeba80400f8bc4caa7 *man/null.Rd d0c414e24ba960be521a0595d81bd7f6 *man/parcor.Rd 3c556de20fc2a810007ed8612677eed6 *man/pcor.Rd c556749e74922268f263ea40c58d20e0 *man/pcor.test.Rd 922da1ecf9e690da319181ad7aa5e2ed *man/plotGraph.Rd c7484eb69f5e0aef83ae0ad2608401ca *man/powerset.Rd 2b821005a5843b3d16c57462cdab8efb *man/rcorr.Rd 0932d501517fe700da435ec235823d7e *man/rnormDag.Rd af76a69c6a5f28a9a3d6c60fa6fd86e6 *man/rsphere.Rd 9f46a7b70cac4c4e1dedc8fffe6f6475 *man/shipley.test.Rd 7c1fe7a6fdd27ecf4aae296b0755c02e *man/stress.Rd b40e67f270cf73eda41e89b6d443f3b5 *man/surdata.Rd 9d329252c3e65acffb04863500e01f80 *man/swp.Rd c47624e0beb0e7b08e0e46cb7364f4e7 *man/topSort.Rd 141278dee8ac77975fe103deb0a4422a *man/transClos.Rd 82705a42917e86a09d59fab548c80c9d *man/triDec.Rd 2e6f9d9a2a0aa878b86efb3a0f1c605d *man/unmakeMG.Rd