sn/0000755000176000001440000000000012262756006010730 5ustar ripleyuserssn/inst/0000755000176000001440000000000012262741374011707 5ustar ripleyuserssn/inst/CITATION0000644000176000001440000000153112262554467013051 0ustar ripleyuserscitHeader("To cite the 'sn' package in publications use:") if(!exists("meta") || is.null(meta)) meta <- packageDescription("sn") citEntry(entry="manual", title = paste("The {R} \\texttt{sn} package : ", "The skew-normal and skew-$t$ distributions (version ", meta$Version, ")", sep=""), author = personList(as.person("A. Azzalini")), address = "Universit\\`a di Padova, Italia", year = substr(meta$Date, 1, 4), url = "http://azzalini.stat.unipd.it/SN", textVersion = paste("Azzalini, A. (", substr(meta$Date, 1, 4), "). ", "The R 'sn' package: The skew-normal and skew-t distributions", " (version ", meta$Version, "). ", "URL http://azzalini.stat.unipd.it/SN", sep="") ) sn/NAMESPACE0000644000176000001440000000172312245406246012151 0ustar ripleyusers # S3method(residuals, selm) # S3method(residuals, mselm) # S3method(fitted, selm) # S3method(fitted, mselm) import("methods") importFrom("graphics", plot) importFrom("stats", optim, nlminb) importFrom("stats", residuals) importFrom("stats", fitted) import("stats4") importFrom("mnormt", pmnorm, dmt, pmt, pd.solve) importFrom("numDeriv", grad, hessian) export(T.Owen, zeta, sn.cumulants, st.cumulants, dsn, psn, qsn, rsn, dst, pst, qst, rst, dsc, psc, qsc, rsc, dmsn, pmsn, rmsn, dmst, pmst, rmst, dmsc, pmsc, rmsc, makeSECdistr, marginalSECdistr, affineTransSECdistr, conditionalSECdistr, cp2dp, dp2cp, sn.infoUv, sn.infoMv, selm, MPpenalty, Qpenalty, selm.fit, sn.mple, st.mple, msn.mle, msn.mple, mst.mple ) exportMethods("show", "plot", "summary", "coef", "logLik", "vcov", "residuals", "fitted") exportClasses("SECdistrUv", "SECdistrMv", "summary.SECdistrUv", "summary.SECdistrMv", "selm", "mselm", "summary.selm", "summary.mselm") sn/data/0000755000176000001440000000000012262741374011643 5ustar ripleyuserssn/data/frontier.rda0000644000176000001440000000071712262741374014170 0ustar ripleyusers r0b```b`b&f H020pi" yμ/nQz}[ޱgJUݸof~ί/?dhIY{8B~Iċ~g+;2gG|w&-f Vԁf|s_bao%km[q;$tT+Ծmz& &y|Ou#Tˇݤb^ZgL.nn! Y׭ů6/I3A`?o/.hWoו#b^Y^iTlG{&UǞIQs5B+~֘N͋*&;Xrb7(߽^1H]U/$3N+޸_GS>|Skg,8*}'S閍 _t%GEu@Q-/墻:<'NK/\/pAj=U\'lT.|e?V.*;ڟU~*/b~_t ׯʕ/%S*}/]6c_6DK#{})U:㣸 Q٨X3SW n=P;c~z! 3%1A/ETv*K'—q(!N~J3w?A!DhwL/]K姥BޠMDvT~SI2+v~LwSpOWJer2)]O=NȥC2gE~V0kqEH~u8͖]f)?x~5K5@?ӕ?]4clT'wDu׉oCԃst?_}Sۧ9~IYU;Uwi<¾AG7}?I6h/h3_UзpΒ|s^갣o-~J|ßOj)NպYӨ1haq\ow=5.;rm#7~?"zpyOw&~E } g6&zBt$"a "wrM1]ʷxJ W'JxY UNo(q%)t)}VŒ1G"Kq_$9ID8ȯIRpJύ>r?cfɱDtY Wt z/~(,[(趎Ъi DoW畒Y]&|ezDr.Q+9oV9HX^bE6 '%¿\ hQY)~K)/zK~fJWVkʷJ%ߤ+.ٴ.TފCSTU8_?뿈[WuF%i=$^'?/J/{Gx_5^_+:xsNϯH sUzLϥ(Wy_4DuBrb˲˧ң Jh6\Ty=C+|4T\=~ =<$rU('P^e{}V:DOSN^zv׺0KT{9':![C {It+gGxO,&dǠ\e(GK#Wg^A?ڇȢ'Y?\Z();>vЕ_JHGj#$ z?Rrz 27KSeKviv'^v~xߠQB?P{_Lr?^k'3}sa+ቊdW{_7W%/'j7ߤs3< ]ɾjcg?%Dz1}'`O]Cpq]vI0Hڧ-]'鏆Je3.؍1B2Gy0ПS=ҡ?FK/hk7o{yT~Ӽ"*߭~G}g;n@8FIu](|w4F*˯*_(=0~}BV;#P~rR?QoeԮ~/<]B;7#"V/{訽'>?~'?i5P{P)/r z_?xGkt%Gj_G~v{RjϤ?ho'ʡ#?ǎ/Hrvvd_P$zB޿8B'|HGN{p?+?Gy/?9%?n=#~$:'ϩq\Wx>ymƑsjO\(ts8yO{@~~ z[7Gpf8/T_JMqY0YPqqQC_<q0xT.g4Nz>x5J 5C?׸6L]oj~P!EO{]s%=k^>>.؝zk'b,~az~{j>w; Bs-}h'~@vڧ.SWhANx{^|ٮ]N: !r7#߲??r~_~8>8\<.JΛ~}|4٩9R.}ްz~I<J|/AϟsNU~8?)|v><VgN;~;褝ި4NO>#Վ>)_W9QMZN٥v|Ýrtkgm8\5L/;_sF#o,ac9^zu䠞D/TPҋJI?">GkC&[S_W[9-:%K#w@d~K',#:sHtz~XW#c~*2NO qS'}C`̻5}Rʣ7G~r:d&^K# /'pOyR`s1K'{ JفQ{|[ik'6=Z|y;#[_2=R\XZK9/Pa?rpN8zZ G {Hd~W9O_YMNNiPڦN]};4OF/𳶄9%krxeMiK#|"jEgyž3^޷T?oev{\gV״yΏ r_ݖE?Z\J_|_~믭),q~z@gE~?K]\R_umvt,?ٻ tfgǖ~ϙ?49MNȹ&MBŎ_mԍɁ]\ǯW8 gnMoɍG/.]x8Ҕ5N?]pq(5{ϗ:y-s#8t_K| /u6yN>/>[تsz]Ԯ3Zglci=mz+~/Hm6r<[-ko9RB9}V8ځoރ?[藛ܸ: n49{ĭq FCm\7/xԭGuBH_*\9n5}Yi 3^g=Pn#-PM}݆|uת]<~[3lG%n \t߸w9bu7M-#9AsQz}~{70s C|.q OB$y 硹o'(|J¹a?qNs!:S\5UTP8_vKLŹpNR9pnTgC /ڙ2>ܝG9{݂K˸"~1z)W9}~{tO^HOs?ʽD!p!{|~)3/qWe _I+.%'r#'@_ƣ!yKyXLzg8NHB ڵΟK )=Ay_ys@=T}Q'p^^(/OS~J?G9`GG/xыBСxХ?tWByʃ_.I]`W?vKzDoG;xOH/E=N􉞼_8ܽό;!qkv7 Uj(⬱]C<1%.|Q9[.+!A]u~(Ϻ ėӽzلa_v=Ǻo=؅8O.yN _+"q"O_* ~ M=u$g`}ǼH.8{qλpC)Yo<=_qnĹc=K#gw p?8;z\2?xZ_8Z̿ڃ]\q>!%~?'qL! 2Ľql; 7 +?qo8 /q9\|, =C|hwDQ1ڧ{ޭx1AϿcU+*7Iau~ d?ջC|VUWHOѢC<)ޏވuXc4>ݩc/~wxpx3N=z>NxƉ?xþ7}jT v9쳄ċsqqHg-\>q#]<}İ?oۧvK_'3#^)UpQcƣBqa?q|k]w*w$#bU1dITOM{pL?NP~OxLpf|~]jS~"}OS{T~+"SgB7U'œcq3^}= 4q{ƙoe*{"NY-p1.xMg8Ľ fnϸO/q'gG\2 Ļ"O\.ROYr9>yW;羧 /8v=Ugoa] ||3t93nqG؇ͫO"b}}2Qz qOp#?qB1|T7uFJ*Fu͈ #z'.S87A)G '.w?!q\ܹ\w>}  ;O++岮w oK߉8*؏d6~?qBa+p΀VCo.QX3혗?(3r/~yG"~: ?#y`mXHo|WG ?ƥ}ߡxB\&~(veߌu1O8YWsx\Zžj9pˇ}/yrCO}};m0I`C+߱Y7СU'Ž+rGO~RR!}gKKf)rF85]B?S=GsHn?M2^fg`?s?w 9o~ LY_qs~'s8o̸@Ǿ _?Og?]ݹ,WD!pӊۮrw4U~}쟫;j3n]=Wx mpCe܈9s¹4?+*%}2 TIw5Y jDgʡ?O^ڟc}A9UϞ _=hc%_ƀ[|ؗw5&}I8' 7E)([cGRw sj<b+V{pMu]LgDH8oa~p 5iS_E_3I+Ɖ`$/1kv+ppNUf_)ؙv.~c/䘤fywOW?Whwؑ~qTN7|OJM_O3_ qtU[Ư:<E4^og?dq~˺Gۡ>['T6^ݫWsG}]W[8;oS~qE ɷS3+Nބo~ݧ#s_C<5YoS8w4X{/{o?ީpح8bec);oWa8`8{K{WUd??P}T;؃{OD>|hcʱ#c.ԓ=vFwJ{S |h~MkÒ0q3+?WCǛ^V[=m.B?._Ⱡnķ8 'l\g$g%TiJOk}jq_/ɵڅvkDz_̶}9\C0_C;Bl=}hv@#_gd~'GRoo3k_}dokz]^WEܫ!) !^껶Zʯ(xm;zͷ'/ՇܼcOWM;>o[UVOg#ݖg=Z2űÈVD'UJml\%9}9$xCk?nkGq<[3fcBܬ= M{v^tql\Jܥ,J~2 6Ṇ ??Ac/0{cHE?ʿ}⿛ݛju;a ]GBz&>y= h].n *]$B<<剻疸-]a>.>Kޕ. ="!!\ƅ"q_kI98_&M贸.ͥ>5qmB<׌o;w`?kLk#6Sd?=ZYu>q"˜}&Ǐ8V˝t` rwt;v}9^qkr| 2ו8jN:nQ:y*nsohWK|9QkD"ڗ!D`վaFze}Z;suwGw'6=ޭ^fmolojnh M]!3Oס j|=ood 6?յ=}=Aw MOڮ>e uon^'#pTOow{Ǝڻw8wuwg&1c\rx5[nʬC}Oun[7pզ]k׬Oo~jmW@yO:5wow\cGǺK]%nWeO;5iW=5^[]wm̞IR~;~Ԏ}}vgr*} ww_%>hwK ]ԅ]xח6*JMq {ԯS>'=w;#=ѧǎ .7~.w+ ^(}MwWvRR%[g^S?lϪW9nƝ釾.}Ӟ0N ģ*#|x!ݦq]=TM);d?vȏ'/\[G|mG4z\{g?wa7\۰WrPAہA; wR/9ۨW~yٷʷS.}NAyw9H0w =HqqX)v/wy'\?п'#t{(C?)~kT.^eOȻp`~__~y0'꯼ GI`@ ijƁƍ"IQx??(e/!g{K$)4~|[7 D`|({bkw{zՎ`Fsg_oxC^6=z5!;p0 ]/+Cᝮ~~~s|7]~y|vӟȚ39gþaǰpIg7z-7ދ>1i=?D3Tݑ7f:?La?=oOgrM]eS;ȎqG v~?.JO\Ò#KO=4(q94ÈxD~_d/zH(^=8zaϸt"lzZh?c˭tqWh8qH*?_'>'(#ćߑ?R}1ׄxм zlj#84<7!oBzFoTo,)M(~do) G_WKI^9_; IO}'hߧrjJyʵ8y 9'|䫟D~z?+~O>@d| ;Rɗ*?}xUrGy}:F "د sZO?E mW|?/ >U_;ę]W짾nϸ;M~ƥe`<E>-wgd[^qw'kx/̯xjKSz& By+s4~5xqY#n}vCFr'1_QvKi5f˗`! O.}"xGzO m7-p_L!S4OJg{)K>/ _&˽Sg=N3&'N=^g\ģ(O|%}*Ï'}VRC y.n?N%[]?7cW<<ؓ}^.o1=Zn_yΎ`Q7a_ƽNՈzD{#w}s=O87I~|k I3_z.?EŁf(^bI~O'[8]yB}rǁ3cWSN5"k(.];r؝gGWeO-ODʍ#N-Ϲ?~(1t|=dqX=ح0HBq3rsKpG +9񐗟Oxn@<<vg"nC'쌙E_{;X+g~>g]Hj̏ Kv?p+i\qWg=KK_BNtIkoϭz<0 sW]FzQ7r/PtN[åu1B4H?k~o~WٿW.=O}/]i8?ϵsL&LzWq?/>w׹z ?!OGk]&{FOX%2Oy?y h7seg?OGUkż3H Yk#wfwRb`=t7>Jk!O OC;{6oUq1 ~=QUXY)t} yGtOy#}p6{W|T}qķmc8.cל']1oHߚ&?9O *'|O}N xi]nardw-_v[ofVQ~dJy紾Ş cw~[ha1ЯpA gXX#o8w<ºC ң[#Bq?a ,@>ʺf}}cW?95z4ˎ!ߙߛ}v 8 ^_O~xۃv;:_`nApaAe]}8Ŏ;4م߅s yaAA&{em~}g؇^GNڍ^irvNF}6&٧8F>RSÏC K߁R .OߝGfyPxرW#Cڗʐ?:|??{>H^q|?ܸ:|t8R>k93ѧp ~8}&N8?MOS_^, x0ξ92s.ͱ-b]^p* #Ǧ?gg܀ß0C~ NH_᰾atÙd cM䬲/k܇ô瞑e8kb|7S~ }'6,HYܩ 弗/;}Z^np.%y |K@NuC#Wx6<פ/P|( N|rγ{ԎsΆ~!=~ XQ7prb!~RWOv\I K>)ٞ_-*b}DU-qՂU;~WqHj=]_jq:L;x?rUlV7o x--qd]g8q$?7Ʌ&V NR~{6WO× |J8|q,|>̓2ȯؽ<Ҽz#=k^W6o~zA˭R'k̓Ϩ0ϱ=x9kgأ*jU%~ԮƼVS;M}jq]*Zϱ_:qWG:x3̗ҿBYAW_ >`c{Z`Z$u9{Acr_q;gn9N{jj}v Yo;L߿g.^7Wװ_Ju*@z?ΟL?߶®*V๊}ZUznU͓'yP;݋1cAāƣGė}gCQɯ?ةL ;t?Vw<Nӟoz SjmNu! VFHqy!wYj~SEuCV sd?l7uH7nh>YAMe[:>tly~(RR>uw^c痱p]}pw|/G7{pÐ;9tOqq{/|qǡO̼7'k~ɇ41?;?qc%^U_orZ 0#004ϲT{Gz_~+g 32zx6?aD=σ~s| vr 0yQ6nc-L?l>~FW7FπFo?wScO@W9v-|O?;gz5q^ТcTgzGTM]L&^7^z4z< hV'EwGt{ԿǣַNQgyoaM'oJ>nuEΧA+գ{HC^ziV݇tMj>hZ"L1>ru=/@6SɍU5]K,:gߍ4/mKi!p*8|\2d4'sn/R/0000755000176000001440000000000012262547603011132 5ustar ripleyuserssn/R/sn-funct.R0000644000176000001440000047203112262547501013016 0ustar ripleyusers# file sn/R/sn-funct.R (various functions) # This file is a component of the package 'sn' for R # copyright (C) 1997-2014 Adelchi Azzalini # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 or 3 of the License # (at your option). # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # http://www.r-project.org/Licenses/ #--------- dsn <- function(x, xi=0, omega=1, alpha=0, tau=0, dp=NULL, log=FALSE) { if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both 'dp' and component parameters") xi <- dp[1] omega <- dp[2] alpha <- dp[3] tau <- if(length(dp)>3) dp[4] else 0 } z <- (x-xi)/omega logN <- (-log(sqrt(2*pi)) -logb(omega) -z^2/2) if(abs(alpha) < Inf) logS <- pnorm(tau * sqrt(1+alpha^2) + alpha*z, log.p=TRUE) else logS <- log(as.numeric(sign(alpha)*z + tau > 0)) logPDF <- as.numeric(logN + logS - pnorm(tau, log.p=TRUE)) replace(logPDF, omega<= 0, NaN) if(log) logPDF else exp(logPDF) } psn <- function(x, xi=0, omega=1, alpha=0, tau=0, dp=NULL, engine, ...) { if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both 'dp' and component parameters") xi <- dp[1] omega <- dp[2] alpha <- dp[3] tau <- if(length(dp)>3) dp[4] else 0L } z <- as.numeric((x-xi)/omega) nz <- length(z) na <- length(alpha) if(missing(engine)) engine <- if(na == 1 & nz > 3 & all(alpha*z > -5) & (tau == 0L)) "T.Owen" else "biv.nt.prob" if(engine == "T.Owen") { if(tau != 0 | na > 1) stop("engine='T.Owen' not compatible with other arguments") p <- pnorm(z) - 2 * T.Owen(z, alpha, ...) } else{ # engine="biv.nt.prob" p <- numeric(nz) alpha <- cbind(z, alpha)[,2] delta <- delta.etc(alpha) p.tau <- pnorm(tau) for(k in seq_len(nz)) { if(abs(alpha[k]) == Inf){ p[k] <- if(alpha[k] > 0) (pnorm(pmax(z[k],-tau)) - pnorm(-tau))/p.tau else 1- (pnorm(tau) - pnorm(pmin(z[k], tau)))/p.tau } else { # SNbook: formula (2.48), p.40 R <- matrix(c(1, -delta[k], -delta[k], 1), 2, 2) p[k] <- biv.nt.prob(0, rep(-Inf,2), c(z[k], tau), rep(0, 2), R)/p.tau } }} p <- pmin(1, pmax(0, as.numeric(p))) replace(p, omega <= 0, NaN) } qsn <- function (p, xi = 0, omega = 1, alpha = 0, tau=0, dp=NULL, tol = 1e-08, ...) { if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both 'dp' and component parameters") xi <- dp[1] omega <- dp[2] alpha <- dp[3] tau <- if(length(dp)>3) dp[4] else 0 } max.q <- sqrt(qchisq(p,1)) + tau min.q <- -sqrt(qchisq(1-p,1)) + tau if(tau == 0) { if(alpha == Inf) return(as.numeric(xi + omega * max.q)) if(alpha == -Inf) return(as.numeric(xi + omega * min.q)) } na <- is.na(p) | (p < 0) | (p > 1) zero <- (p == 0) one <- (p == 1) p <- replace(p, (na | zero | one), 0.5) dp0 <- c(0, 1, alpha, tau) cum <- sn.cumulants(dp=dp0, n=4) g1 <- cum[3]/cum[2]^(3/2) g2 <- cum[4]/cum[2]^2 x <- qnorm(p) x <- (x + (x^2 - 1) * g1/6 + x * (x^2 - 3) * g2/24 - x * (2 * x^2 - 5) * g1^2/36) x <- cum[1] + sqrt(cum[2]) * x px <- psn(x, dp=dp0, ...) max.err <- 1 while (max.err > tol) { # cat("qsn:", x, "\n") # cat('x, px:', format(c(x,px)),"\n") # browser() x1 <- x - (px - p)/dsn(x, dp=dp0) # x1 <- pmin(x1,max.q) # x1 <- pmax(x1,min.q) x <- x1 px <- psn(x, dp=dp0, ...) max.err <- max(abs(px-p)) } x <- replace(x, na, NA) x <- replace(x, zero, -Inf) x <- replace(x, one, Inf) q <- as.numeric(xi + omega * x) # p0 <- psn(q, dp=dp) # cat("qsn check:\n"); print(cbind(p,p0,q)) q } # rsn <- function(n=1, xi=0, omega=1, alpha=0, tau=0, dp=NULL) { if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both 'dp' and component parameters") xi <- dp[1] omega <- dp[2] alpha <- dp[3] tau <- if(length(dp)>3) dp[4] else 0 } if(tau == 0) { u1 <- rnorm(n) u2 <- rnorm(n) id <- (u2 > alpha*u1) u1[id] <- (-u1[id]) z <- u1 } else { # for ESN use transformation method delta <- alpha/sqrt(1+alpha^2) truncN <- qnorm(runif(n, min=pnorm(-tau), max=1)) z <- delta * truncN + sqrt(1-delta^2) * rnorm(n) } y <- xi+omega*z attr(y,"parameters") <- c(xi,omega,alpha,tau) return(y) } dmsn <- function(x, xi=rep(0,length(alpha)), Omega, alpha, tau=0, dp=NULL, log=FALSE) { if(!(missing(alpha) & missing(Omega)) && !is.null(dp)) stop("You cannot set both component parameters and dp") if(!is.null(dp)){ if(length(dp) < 3) stop("wrong length of non-null 'dp'") xi <- drop(dp[[1]]) Omega <- dp[[2]] alpha <- dp[[3]] tau <- if(length(dp) == 4) dp[[4]] else 0 } if(any(abs(alpha) == Inf)) stop("Inf's in alpha are not allowed") d <- length(alpha) Omega <- matrix(Omega,d,d) invOmega <- pd.solve(Omega, silent=TRUE, log.det=TRUE) if (is.null(invOmega)) stop("Omega matrix is not positive definite") logDet <- attr(invOmega, "log.det") x <- if(is.vector(x)) matrix(x, 1, d) else data.matrix(x) if(is.vector(xi)) xi <- outer(rep(1,nrow(x)), xi) if(tau == 0){ log.const <- logb(2) alpha0 <- 0 } else { log.const <- -pnorm(tau, log.p=TRUE) O.alpha <- cov2cor(Omega) %*% alpha alpha0 <- tau*sqrt(1+sum(alpha* O.alpha)) } X <- t(x - xi) Q <- apply((invOmega %*% X) * X, 2, sum) L <- alpha0 + as.vector(t(X/sqrt(diag(Omega))) %*% as.matrix(alpha)) logPDF <- (log.const - 0.5 * Q + pnorm(L, log.p = TRUE) - 0.5 * (d * logb(2 * pi) + logDet)) if (log) logPDF else exp(logPDF) } pmsn <- function(x, xi=rep(0,length(alpha)), Omega, alpha, tau=0, dp=NULL, ...) { if(!(missing(alpha) & missing(Omega)) && !is.null(dp)) stop("You cannot set both component parameters and dp") if(!is.null(dp)){ xi <- dp$xi Omega <- dp$Omega alpha <- dp$alpha tau <- if(is.null(dp$tau)) 0 else dp$tau } if(any(abs(alpha) == Inf)) stop("Inf's in alpha are not allowed") d <- length(alpha) Omega<- matrix(Omega,d,d) omega<- sqrt(diag(Omega)) delta_etc <- delta.etc(alpha, Omega) delta <- delta_etc$delta Ocor <- delta_etc$Omega.cor Obig <- matrix(rbind(c(1,-delta), cbind(-delta,Ocor)), d+1, d+1) nx <- if(is.matrix(x)) nrow(x) else 1 p.tau <- pnorm(tau) if(nx == 1) { if(!is.vector(x)) stop("x must be either a vector or a matrix") z0 <- c(tau,(x-xi)/omega) p <- pmnorm(z0, mean=rep(0,d+1), varcov=Obig, ...)/p.tau } else { p <- numeric(nx) z <- t(t(x - outer(rep(1,nx), xi))/omega) z0 <- cbind(tau, z) for(k in seq_len(nx)) p[k] <- pmnorm(z0[k,], mean=rep(0,d+1), varcov=Obig, ...)/p.tau } p } rmsn <- function(n=1, xi=rep(0,length(alpha)), Omega, alpha, tau=0, dp=NULL) {# generates SN_d(..) variates using transformation method # if(!(missing(alpha) & missing(Omega) & !is.null(dp))) # stop("You cannot set both component parameters and dp") if(!is.null(dp)) { dp0 <- dp if(is.null(dp0$tau)) dp0$tau <- 0 } else dp0 <- list(xi=xi, Omega=Omega, alpha=alpha, tau=tau) if(any(abs(dp0$alpha) == Inf)) stop("Inf's in alpha are not allowed") lot <- dp2cpMv(dp=dp0, family="SN", aux=TRUE) d <- length(dp0$alpha) y <- matrix(rnorm(n*d), n, d) %*% chol(lot$aux$Psi) # each row is N_d(0,Psi) if(dp0$tau == 0) truncN <- abs(rnorm(n)) else truncN <- qnorm(runif(n, min=pnorm(-dp0$tau), max=1)) truncN <- matrix(rep(truncN,d), ncol=d) delta <- lot$aux$delta z <- delta * t(truncN) + sqrt(1-delta^2) * t(y) y <- t(dp0$xi + lot$aux$omega * z) attr(y,"parameters") <- dp return(y) } # dst <- function (x, xi=0, omega=1, alpha=0, nu=Inf, dp=NULL, log=FALSE) { if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both component parameters and dp") xi <- dp[1] omega <- dp[2] alpha <- dp[3] nu <- dp[4] } if (nu == Inf) return(dsn(x, xi, omega, alpha, log=log)) if (nu == 1) return(dsc(x, xi, omega, alpha, log=log)) z <- (x - xi)/omega pdf <- dt(z, df=nu, log=log) cdf <- pt(alpha*z*sqrt((nu+1)/(z^2+nu)), df=nu+1, log.p=log) if(log) logb(2) + pdf + cdf -logb(omega) else 2 * pdf * cdf / omega } rst <- function (n=1, xi = 0, omega = 1, alpha = 0, nu=Inf, dp=NULL) { if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both component parameters and dp") xi <- dp[1] omega <- dp[2] alpha <- dp[3] nu <- dp[4] } z <- rsn(n, 0, omega, alpha) if(nu < Inf) { v <- rchisq(n,nu)/nu y <- z/sqrt(v) + xi } else y <- z+xi attr(y,"parameters") <- c(xi,omega,alpha,nu) return(y) } pst <- function (x, xi=0, omega=1, alpha=0, nu=Inf, dp=NULL, ...) { if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both component parameters and dp") xi <- dp[1] omega <- dp[2] alpha <- dp[3] nu <- dp[4] } if(length(alpha) > 1) stop("'alpha' must be a single value") if(length(nu) > 1) stop("'nu' must be a single value") if (nu <= 0) stop("nu must be non-negative") if (nu == Inf) return(psn(x, xi, omega, alpha)) if (nu == 1) return(psc(x, xi, omega, alpha)) ok <- !(is.na(x) | (x==Inf) | (x==-Inf)) z <- ((x-xi)/omega)[ok] if(abs(alpha) == Inf) { z0 <- replace(z, alpha*z < 0, 0) p <- pf(z0^2, 1, nu) return(if(alpha>0) p else (1-p)) } fp <- function(v, alpha, nu, t.value) psn(sqrt(v) * t.value, 0, 1, alpha) * dchisq(v * nu, nu) * nu if(round(nu)==nu && (nu < (8.20 + 3.55* log(log(length(z)+1))))) p <- pst_int(z, 0, 1, alpha, nu) # "method 4" else { p <- numeric(length(z)) for (i in seq_len(length(z))) { if(abs(z[i]) == Inf) p[i] <- (1+sign(z[i]))/2 else { if(round(nu)==nu) p[i] <- pmst(z[i], 0, matrix(1,1,1), alpha, nu, ...) # method 1 else { # upper <- if(absalpha> 1) 5/absalpha + 25/(absalpha*nu) else 5+25/nu upper <- 10 + 50/nu if(z[i] < upper) # method 2 p[i] <- integrate(dst, -Inf, z[i], dp=c(0,1,alpha, nu), ...)$value else # method 3 p[i] <- integrate(fp, 0, Inf, alpha, nu, z[i], ...)$value }} }} pr <- rep(NA, length(x)) pr[x==Inf] <- 1 pr[x==-Inf] <- 0 pr[ok] <- p return(pmax(0,pmin(1,pr))) } pst_int <- function (x, xi=0, omega=1, alpha=0, nu=Inf) {# Jamalizadeh, A. and Khosravi, M. and Balakrishnan, N. (2009) if(nu != round(nu) | nu < 1) stop("nu not integer or not positive") z <- (x-xi)/omega if(nu == 1) atan(z)/pi + acos(alpha/sqrt((1+alpha^2)*(1+z^2)))/pi else { if(nu==2) 0.5 - atan(alpha)/pi + (0.5 + atan(z*alpha/sqrt(2+z^2))/pi)*z/sqrt(2+z^2) else (pst_int(sqrt((nu-2)/nu)*z, 0, 1, alpha, nu-2) + pst_int(sqrt(nu-1)*alpha*z/sqrt(nu+z^2), 0, 1, 0, nu-1) * z * exp(lgamma((nu-1)/2) +(nu/2-1)*log(nu)-0.5*log(pi)-lgamma(nu/2) -0.5*(nu-1)*log(nu+z^2))) } } qst <- function (p, xi = 0, omega = 1, alpha = 0, nu=Inf, tol = 1e-8, dp = NULL, ...) { if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both component parameters and dp") xi <- dp[1] omega <- dp[2] alpha <- dp[3] nu <- dp[4] } if(length(alpha) > 1) stop("'alpha' must be a single value") if(length(nu) > 1) stop("'nu' must be a single value") if (nu <= 0) stop("nu must be non-negative") if (nu == Inf) return(qsn(p, xi, omega, alpha)) if (nu == 1) return(qsc(p, xi, omega, alpha)) if (alpha == Inf) return(xi + omega * sqrt(qf(p, 1, nu))) if (alpha == -Inf) return(xi - omega * sqrt(qf(1 - p, 1, nu))) na <- is.na(p) | (p < 0) | (p > 1) abs.alpha <- abs(alpha) if(alpha < 0) p <- (1-p) zero <- (p == 0) one <- (p == 1) x <- xa <- xb <- xc <- fa <- fb <- fc <- rep(NA, length(p)) nc <- rep(TRUE, length(p)) # not converged (yet) nc[(na| zero| one)] <- FALSE fc[!nc] <- 0 xa[nc] <- qt(p[nc], nu) xb[nc] <- sqrt(qf(p[nc], 1, nu)) fa[nc] <- pst(xa[nc], 0, 1, abs.alpha, nu) - p[nc] fb[nc] <- pst(xb[nc], 0, 1, abs.alpha, nu) - p[nc] regula.falsi <- FALSE while (sum(nc) > 0) { # alternate regula falsi/bisection xc[nc] <- if(regula.falsi) xb[nc] - fb[nc] * (xb[nc] - xa[nc])/(fb[nc] - fa[nc]) else (xb[nc] + xa[nc])/2 fc[nc] <- pst(xc[nc], 0, 1, abs.alpha, nu) - p[nc] pos <- (fc[nc] > 0) xa[nc][!pos] <- xc[nc][!pos] fa[nc][!pos] <- fc[nc][!pos] xb[nc][pos] <- xc[nc][pos] fb[nc][pos] <- fc[nc][pos] x[nc] <- xc[nc] nc[(abs(fc) < tol)] <- FALSE regula.falsi <- !regula.falsi } # x <- replace(x, na, NA) x <- replace(x, zero, -Inf) x <- replace(x, one, Inf) q <- as.numeric(xi + omega * sign(alpha)* x) names(q) <- names(p) return(q) } dmst <- function(x, xi=rep(0,length(alpha)), Omega, alpha, nu=Inf, dp=NULL, log = FALSE) { if(!(missing(alpha) & missing(Omega)) && !is.null(dp)) stop("You cannot set both component parameters and dp") if(!is.null(dp)) { if(length(dp) != 4) stop("wrong length of non-null 'dp'") xi <- drop(dp[[1]]) Omega <- dp[[2]] alpha <- dp[[3]] nu <- dp[[4]] } if(any(abs(alpha) == Inf)) stop("Inf's in alpha are not allowed") if (nu == Inf) return(dmsn(x, xi, Omega, alpha, log = log)) d <- length(alpha) Omega <- matrix(Omega, d, d) if(!all(Omega - t(Omega) == 0)) return(NA) # stop("Omega not a symmetric matrix") invOmega <- pd.solve(Omega, silent=TRUE, log.det=TRUE) if(is.null(invOmega)) return(NA) # stop("Omega matrix is not positive definite") logDet <- attr(invOmega, "log.det") x <- if(is.vector(x)) matrix(x, 1, d) else data.matrix(x) if(is.vector(xi)) xi <- outer(rep(1,nrow(x)), xi) X <- t(x - xi) Q <- apply((invOmega %*% X) * X, 2, sum) L <- as.vector(t(X/sqrt(diag(Omega))) %*% as.matrix(alpha)) if(nu < 1e4) { log.const <- lgamma((nu + d)/2)- lgamma(nu/2)-0.5*d*logb(nu) log1Q <- logb(1+Q/nu) } else { log.const <- (-0.5*d*logb(2)+ log1p((d/2)*(d/2-1)/nu)) log1Q <- log1p(Q/nu) } log.dmt <- log.const - 0.5*(d * logb(pi) + logDet + (nu + d)* log1Q) log.pt <- pt(L * sqrt((nu + d)/(Q + nu)), df = nu + d, log.p = TRUE) logPDF <- logb(2) + log.dmt + log.pt if (log) logPDF else exp(logPDF) } rmst <- function(n=1, xi=rep(0,length(alpha)), Omega, alpha, nu=Inf, dp=NULL) { if(!(missing(alpha) & missing(Omega)) && !is.null(dp)) stop("You cannot set both component parameters and dp") if(!is.null(dp)){ if(!is.null(dp$xi)) xi <- dp$xi else if(!is.null(dp$beta)) xi <- as.vector(dp$beta) Omega <- dp$Omega alpha <- dp$alpha nu <- dp$nu } if(any(abs(alpha) == Inf)) stop("Inf's in alpha are not allowed") d <- length(alpha) x <- if(nu==Inf) 1 else rchisq(n,nu)/nu z <- rmsn(n, rep(0,d), Omega, alpha) y <- t(xi+ t(z/sqrt(x))) attr(y,"parameters") <- list(xi=xi, Omega=Omega, alpha=alpha, nu=nu) return(y) } pmst <- function(x, xi=rep(0,length(alpha)), Omega, alpha, nu=Inf, dp=NULL, ...) { if(!(missing(alpha) & missing(Omega)) && !is.null(dp)) stop("You cannot set both component parameters and dp") if(!is.null(dp)){ if(!is.null(dp$xi)) xi <- dp$xi else if(!is.null(dp$beta)) xi <- as.vector(dp$beta) Omega <- dp$Omega alpha <- dp$alpha nu <- dp$nu } if(!is.vector(x)) stop("x must be a vector") if(any(abs(alpha) == Inf)) stop("Inf's in alpha are not allowed") d <- length(alpha) Omega<- matrix(Omega,d,d) omega<- sqrt(diag(Omega)) Ocor <- cov2cor(Omega) O.alpha <- as.vector(Ocor %*% alpha) delta <- O.alpha/sqrt(1+sum(alpha*O.alpha)) Obig <- matrix(rbind(c(1,-delta), cbind(-delta,Ocor)), d+1, d+1) if(nu == as.integer(nu)) { z0 <- c(0,(x-xi)/omega) if(nu < .Machine$integer.max) p <- 2 * pmt(z0, mean=rep(0,d+1), S=Obig, df=nu, ...) else p <- 2 * pmnorm(z0, mean=rep(0,d+1), varcov=Obig, ...) } else { # for fractional nu, use formula in the "extended SE paper" z <- (x-xi)/omega fp <- function(v, Ocor, alpha, nu, t.value) { pv <- numeric(length(v)) for(k in seq_len(length(v))) pv[k] <- (dchisq(v[k] * nu, nu) * nu * pmsn(sqrt(v[k]) * t.value, 0, Ocor, alpha) ) pv} p <- integrate(fp, 0, Inf, Ocor, alpha, nu, z, ...)$value } p } dmsc <- function(x, xi=rep(0,length(alpha)), Omega, alpha, dp=NULL, log = FALSE) { if(is.null(dp)) dp <- list(xi=xi, Omega=Omega, alpha=alpha, nu=1) else dp$nu <- 1 dmst(x, dp=dp, log = log) } pmsc <- function(x, xi=rep(0,length(alpha)), Omega, alpha, dp=NULL, ...) { if(is.null(dp)) dp <- list(xi=xi, Omega=Omega, alpha=alpha, nu=1) else dp$nu <- 1 pmst(x, dp=dp, ...) } rmsc <- function(n=1, xi=rep(0,length(alpha)), Omega, alpha, dp=NULL) { if(is.null(dp)) dp <- list(xi=xi, Omega=Omega, alpha=alpha, nu=1) else dp$nu <- 1 rmst(n, dp=dp) } dsc <- function(x, xi=0, omega=1, alpha=0, dp=NULL, log = FALSE) { # log.pt2 <- function(x) log1p(x/sqrt(2+x^2)) - log(2) if(!is.null(dp)){ if(!missing(alpha)) stop("You cannot set both 'dp' and component parameters") xi<- dp[1] omega <- dp[2] alpha <- dp[3] } z <- (x-xi)/omega logPDF <- (dcauchy(x, xi, omega, log=TRUE) + log1p(alpha*z/sqrt(1+z^2*(1+alpha^2)))) if(log) logPDF else exp(logPDF) } psc <- function(x, xi=0, omega=1, alpha=0, dp=NULL) {# Behboodian et al. / Stat. & Prob. Letters 76 (2006) p.1490, line 2 if(!is.null(dp)){ if(!missing(alpha)) stop("You cannot set both 'dp' and component parameters") xi<- dp[1] omega <- dp[2] alpha <- dp[3] } z <- (x-xi)/omega delta <- if(abs(alpha)==Inf) sign(alpha) else alpha/sqrt(1+alpha^2) atan(z)/pi + acos(delta/sqrt(1+z^2))/pi } qsc <- function(p, xi=0, omega=1, alpha=0, dp=NULL) {# Behboodian et al. / Stat. & Prob. Letters 76 (2006) 1488–1493, formula (4) if(!is.null(dp)){ if(!missing(alpha)) stop("You cannot set both 'dp' and component parameters") xi<- dp[1] omega <- dp[2] alpha <- dp[3] } na <- is.na(p) | (p < 0) | (p > 1) zero <- (p == 0) one <- (p == 1) p <- replace(p, (na | zero | one), 0.5) u <- (p-0.5)*pi delta <- if(abs(alpha)==Inf) sign(alpha) else alpha/sqrt(1+alpha^2) z <- delta/cos(u) + tan(u) z <- replace(z, na, NA) z <- replace(z, zero, -Inf) z <- replace(z, one, Inf) as.numeric(xi + omega*z) } rsc <- function(n=1, xi=0, omega=1, alpha=0, dp=NULL) { if(!is.null(dp)){ if(!missing(alpha)) stop("You cannot set both 'dp' and component parameters") xi <- dp[1] omega <- dp[2] alpha <- dp[3] } xi + rsn(n, 0, omega, alpha)/abs(rnorm(n)) } sn.cumulants <- function(xi = 0, omega = 1, alpha = 0, tau=0, dp=NULL, n=4) { cumulants.half.norm <- function(n=4){ n <- max(n,2) n <- as.integer(2*ceiling(n/2)) half.n <- as.integer(n/2) m <- 0:(half.n-1) a <- sqrt(2/pi)/(gamma(m+1)*2^m*(2*m+1)) signs <- rep(c(1, -1), half.n)[seq_len(half.n)] a <- as.vector(rbind(signs*a, rep(0,half.n))) coeff <- rep(a[1],n) for (k in 2:n) { ind <- seq_len(k-1) coeff[k] <- a[k] - sum(ind*coeff[ind]*a[rev(ind)]/k) } kappa <- coeff*gamma(seq_len(n)+1) kappa[2] <- 1 + kappa[2] return(kappa) } if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both component parameters and dp") dp <- c(dp,0)[1:4] dp <- matrix(dp, 1, ncol=length(dp)) } else dp <- cbind(xi,omega,alpha,tau) delta <- ifelse(abs(dp[,3])n) kv <- kv[-(n+1)] kv[2] <- kv[2] - 1 kappa <- outer(delta,1:n,"^") * matrix(rep(kv,nrow(dp)),ncol=n,byrow=TRUE) } else{ # ESN if(n>4){ warning("n>4 not allowed with ESN distribution") n <- min(n, 4) } kappa <- matrix(0, nrow=length(delta), ncol=0) for (k in 1:n) kappa <- cbind(kappa, zeta(k,tau)*delta^k) } kappa[,2] <- kappa[,2] + 1 kappa <- kappa * outer(dp[,2],(1:n),"^") kappa[,1] <- kappa[,1] + dp[,1] kappa[,,drop=TRUE] } zeta <- function(k, x) { # k integer in (0,5) if(k<0 | k>5 | k != round(k)) return(NULL) na <- is.na(x) x <- replace(x,na,0) x2 <- x^2 z <- switch(k+1, pnorm(x, log.p=TRUE) + log(2), ifelse(x>(-50), exp(dnorm(x, log=TRUE) - pnorm(x, log.p=TRUE)), -x/(1 -1/(x2+2) +1/((x2+2)*(x2+4)) -5/((x2+2)*(x2+4)*(x2+6)) +9/((x2+2)*(x2+4)*(x2+6)*(x2+8)) -129/((x2+2)*(x2+4)*(x2+6)*(x2+8)*(x2+10)) )), (-zeta(1,x)*(x+zeta(1,x))), (-zeta(2,x)*(x+zeta(1,x)) - zeta(1,x)*(1+zeta(2,x))), (-zeta(3,x)*(x+2*zeta(1,x)) - 2*zeta(2,x)*(1+zeta(2,x))), (-zeta(4,x)*(x+2*zeta(1,x)) -zeta(3,x)*(3+4*zeta(2,x)) -2*zeta(2,x)*zeta(3,x)), NULL) neg.inf <- (x == -Inf) if(any(neg.inf)) z <- switch(k+1, z, replace(z, neg.inf, Inf), replace(z, neg.inf, -1), replace(z, neg.inf, 0), replace(z, neg.inf, 0), replace(z, neg.inf, 0), NULL) if(k>1) z<- replace(z, x==Inf, 0) replace(z, na, NA) } st.cumulants <- function(xi=0, omega=1, alpha=0, nu=Inf, dp=NULL, n=4) { if(!is.null(dp)) { if(!missing(alpha)) stop("You cannot set both component parameters and dp") xi <- dp[1] omega <- dp[2] alpha <- dp[3] nu <- dp[4] } if(nu == Inf) return(sn.cumulants(xi, omega, alpha, n=n)) n <- min(as.integer(n),4) # if(nu <= n) stop("need nu>n") par <- cbind(xi,omega,alpha) delta <- par[,3]/sqrt(1+par[,3]^2) cum<- matrix(NA, nrow=nrow(par), ncol=n) cum[,1]<- mu <- b(nu)*delta # if(n>1) cum[,2] <- nu/(nu-2) - mu^2 # if(n>2) cum[,3] <- mu*(nu*(3-delta^2)/(nu-3) - 3*nu/(nu-2)+2*mu^2) # if(n>3) cum[,4] <- (3*nu^2/((nu-2)*(nu-4)) - 4*mu^2*nu*(3-delta^2)/(nu-3) # + 6*mu^2*nu/(nu-2)-3*mu^4)- 3*cum[,2]^2 r <- function(nu, k1, k2) 1/(1-k2/nu) - k1/(nu-k2) # (nu-k1)/(nu-k2) if(n>1 & nu>2) cum[,2] <- r(nu,0,2) - mu^2 if(n>2 & nu>3) cum[,3] <- mu*((3-delta^2)*r(nu,0,3) - 3*r(nu,0,2) + 2*mu^2) if(n>3 & nu>4) cum[,4] <- (3*r(nu,0,2)*r(nu,0,4) - 4*mu^2*(3-delta^2)*r(nu,0,3) + 6*mu^2*r(nu,0,2)-3*mu^4) - 3*cum[,2]^2 cum <- cum*outer(par[,2],1:n,"^") cum[,1] <- cum[,1]+par[,1] cum[,,drop=TRUE] } T.Owen <- function(h, a, jmax=50, cut.point=8) { T.int <-function(h, a, jmax, cut.point) { fui <- function(h,i) (h^(2*i))/((2^i)*gamma(i+1)) seriesL <- seriesH <- NULL i <- 0:jmax low<- (h <= cut.point) hL <- h[low] hH <- h[!low] L <- length(hL) if (L > 0) { b <- outer(hL, i, fui) cumb <- apply(b, 1, cumsum) b1 <- exp(-0.5*hL^2) * t(cumb) matr <- matrix(1, jmax+1, L) - t(b1) jk <- rep(c(1,-1), jmax)[1:(jmax+1)]/(2*i+1) matr <- t(matr*jk) %*% a^(2*i+1) seriesL <- (atan(a) - as.vector(matr))/(2*pi) } if (length(hH) > 0) seriesH <- atan(a)*exp(-0.5*(hH^2)*a/atan(a)) * (1+0.00868*(hH*a)^4)/(2*pi) series <- c(seriesL, seriesH) id <- c((1:length(h))[low],(1:length(h))[!low]) series[id] <- series # re-sets in original order series } if(!is.vector(a) | length(a)>1) stop("'a' must be a vector of length 1") if(!is.vector(h)) stop("'h' must be a vector") aa <- abs(a) ah <- abs(h) if(is.na(aa)) stop("parameter 'a' is NA") if(aa==Inf) return(sign(a)*0.5*pnorm(-ah)) # sign(a): 16.07.2007 if(aa==0) return(rep(0,length(h))) na <- is.na(h) inf <- (ah == Inf) ah <- replace(ah,(na|inf),0) if(aa <= 1) owen <- T.int(ah,aa,jmax,cut.point) else owen<- (0.5*pnorm(ah) + pnorm(aa*ah)*(0.5-pnorm(ah)) - T.int(aa*ah,(1/aa),jmax,cut.point)) owen <- replace(owen,na,NA) owen <- replace(owen,inf,0) return(owen*sign(a)) } #========================================================================= # new probability functions: SECdistr() etc makeSECdistr <- function(dp, family, name, compNames) { if(!(toupper(family) %in% c("SN","ESN","SC","ST"))) stop("unknown family") family <- toupper(family) ndp <- if(family %in% c("SN", "SC") ) 3 else 4 if(length(dp) != ndp) stop("wrong number of dp components") if(family == "ST") { nu <- as.numeric(dp[4]) if(nu <= 0) stop("'nu' for ST family must be positive") if(nu == Inf) { warning("ST family with 'nu==Inf' is changed to SN family") family <- "SN" dp <- dp[-4] }} if(is.numeric(dp)){ # univariate distribution if(dp[2] <= 0) stop("omega parameter must be positive") fourth <- switch(family, "SN"=NULL, "ESN"="tau", "SC"=NULL, "ST"="nu") names(dp) <- c("xi","omega","alpha",fourth) name <- if(!missing(name)) as.character(name)[1] else paste("Unnamed-", toupper(family), sep="") obj <- new("SECdistrUv", dp=dp, family=family, name=name) } else {if(is.list(dp)) {# multivariate distribution names(dp) <- rep(NULL,ndp) d <- length(dp[[3]]) if(any(abs(dp[[3]]) == Inf)) stop("Inf in alpha not allowed") if(length(dp[[1]]) != d) stop("mismatch of parameters size") Omega <- matrix(dp[[2]],d,d) if(any(Omega != t(Omega))) stop("Omega matrix must be symmetric") if(min(eigen(Omega, symmetric=TRUE, only.values=TRUE)$values) <= 0) stop("Omega matrix must be positive definite") dp0 <- list(xi=as.vector(dp[[1]]), Omega=Omega, alpha=dp[[3]]) name <- if(!missing(name)) as.character(name)[1] else paste("Unnamed-", toupper(family), "[d=", as.character(d), "]", sep="") if(family=="ST") dp0$nu <- nu if(family=="ESN") dp0$tau <- dp[[4]] if(d == 1) warning(paste( "A multivariate distribution with dimension=1 is a near-oxymoron.", "\nConsider using a 'dp' vector to define a univariate distribution.", "\nHowever, I still build a multivariate distribution for you.")) if(missing(compNames)) { compNames <- if(length(colnames(dp[[1]])) == d) colnames(dp[[1]]) else as.vector(outer("V",as.character(1:d),paste,sep="")) } else { if(length(compNames) != d) stop("Wrong length of 'compNames'") compNames <- as.character(as.vector(compNames)) } obj <- new("SECdistrMv", dp=dp0, family=family, name=name, compNames=compNames) } else stop("'dp' must be either a numeric vector or a list")} obj } summary.SECdistrUv <- function(object, cp.type="auto", probs) { cp.type <- match.arg(tolower(cp.type), c("proper", "pseudo", "auto")) family <- slot(object,"family") lc.family <- tolower(family) name <- slot(object,"name") dp <- slot(object,"dp") if(family=="ST" || family=="SC") { if(cp.type=="auto") cp.type <- if(family == "SC" | dp[4] <= 4) "pseudo" else "proper" } if(family=="SN" || family=="ESN") cp.type <- "proper" cp <- dp2cpUv(dp, family, cp.type) if(is.null(cp)) stop('Stop. Consider using cp.type=="pseudo"') if(missing(probs)) probs <- c(0.05, 0.25, 0.50, 0.75, 0.95) if(lc.family == "esn") lc.family <- "sn" q.fn <- get(paste("q",lc.family, sep=""), inherits = TRUE) q <- q.fn(probs, dp=dp) names(q) <- format(probs) cum <- switch(lc.family, "sn" = sn.cumulants(dp=dp, n=4), "st" = st.cumulants(dp=dp, n=4), rep(NA,4) ) std.cum <- c(gamma1=cum[3]/cum[2]^1.5, gamma2=cum[4]/cum[2]^2) oct <- q.fn(p=(1:7)/8, dp=dp) mode <- modeSECdistrUv(dp, family) alpha<- as.numeric(dp[3]) delta <- delta.etc(alpha) q.measures <- c(bowley=(oct[6]-2*oct[4]+oct[2])/(oct[6]-oct[2]), moors=(oct[7]-oct[5]+oct[3]-oct[1])/(oct[6]-oct[2])) aux <- list(delta=delta, mode=mode, quantiles=q, std.cum=std.cum, q.measures=q.measures) new("summary.SECdistrUv", dp=dp, family=family, name=name, cp=cp, cp.type=cp.type, aux=aux) } modeSECdistr <- function(dp, family) if(is.list(dp)) modeSECdistrMv(dp, family) else modeSECdistrUv(dp, family) modeSECdistrUv <- function(dp, family) { if(abs(dp[3]) < .Machine$double.eps) return(as.numeric(dp[1])) cp <- dp2cpUv(dp, family, cp.type="auto", upto=1) lc.family <- tolower(family) if(lc.family == "esn") lc.family <- "sn" d.fn <- get(paste("d", lc.family, sep=""), inherits = TRUE) int <- c(dp[1], cp[1]) if(abs(diff(int)) < .Machine$double.eps) return(mean(int)) opt <- optimize(d.fn, lower=min(int), upper=max(int), maximum=TRUE, dp=dp) as.numeric(opt$maximum) } modeSECdistrMv <- function(dp, family) { Omega <- dp[[2]] alpha <- dp[[3]] delta_etc <- delta.etc(alpha, Omega) alpha.star <- delta_etc$alpha.star if(alpha.star < .Machine$double.eps) return(dp[[1]]) lc.family <- tolower(family) if(lc.family == "esn") lc.family <- "sn" direct <- sqrt(diag(Omega)) * (delta_etc$delta/delta_etc$delta.star) if(lc.family == "sn") {# case SN: book (5.49), +ESN dp1 <- c(xi=0, omega=1, alpha=alpha.star, dp$tau) mode.can <- modeSECdistrUv(dp1, family) mode <- as.numeric(dp[[1]] + mode.can * direct) } else { # ST, SC: book Prop. 6.2 d.fn <- get(paste("dm", lc.family, sep=""), inherits = TRUE) f <- function(u, dp, direct) -d.fn(dp[[1]]+ u*direct, dp=dp, log=TRUE) maxM <- max(dp2cpMv(dp, family, "auto", upto=1)[[1]] - dp[[1]]/direct) opt <- optimize(f, lower=0, upper=maxM, dp=dp, direct=direct) mode <- as.numeric(dp[[1]]+ opt$minimum * direct) } return(mode) } summary.SECdistrMv <- function(object, cp.type="auto") { cp.type <- match.arg(tolower(cp.type), c("proper", "pseudo", "auto")) family <- slot(object,"family") name <- slot(object,"name") dp <- slot(object,"dp") if(family == "SN" || family == "ESN") cp.type <- "proper" if(family=="ST" || family=="SC") { if(cp.type=="auto") cp.type <- if(family == "SC" || dp$nu <= 4) "pseudo" else "proper"} cp <- dp2cpMv(dp, family, cp.type, aux=TRUE) aux <- cp$aux if(family=="SN" | family=="SC") cp <- cp[1:3] cp[["aux"]] <- NULL mode <- modeSECdistrMv(dp, family) aux0 <- list(mode=mode, delta=aux$delta, alpha.star=aux$alpha.star, delta.star=aux$delta.star, mardia=aux$mardia) new("summary.SECdistrMv", dp=dp, family=family, name=object@name, compNames=object@compNames, cp=cp, cp.type=cp.type, aux=aux0) } dp2cp <- function(dp, family, obj=NULL, cp.type="proper", upto=NULL) { if(!is.null(obj)){ if(!missing(dp)) stop("you cannot set both arguments dp and obj") obj.class <- class(obj) if(!(obj.class %in% c("SECdistrUv", "SECdistrMv"))) stop("wrong type of object") family <- slot(obj,"family") dp <- slot(obj,"dp") multiv <- (obj.class == "SECdistrMv") } else{ if(missing(family)) stop("family required") family <- toupper(family) if(!(family %in% c("SN", "ESN", "ST","SC"))) stop(gettextf("family '%s' is not supported", family), domain = NA) multiv <- is.list(dp) } if(multiv) dp2cpMv(dp, family, cp.type, upto=upto) else dp2cpUv(dp, family, cp.type, upto=upto) } dp2cpUv <- function(dp, family, cp.type="proper", upto=NULL) { # internal function; works also with regression parameters included cp.type <- match.arg(tolower(cp.type), c("proper", "pseudo", "auto")) family <- toupper(family) if(!(family %in% c("SN", "ESN", "ST", "SC"))) stop(gettextf("family = '%s' is not supported", family), domain = NA) if(family %in% c("SN","ESN")){ if(cp.type == "pseudo") warning("'cp.type=pseudo' makes no sense for SN and ESN families") p <- length(dp)-2-as.numeric(family=="ESN") omega <- dp[p+1] if(omega <= 0) stop("scale parameter 'omega' must be positive") alpha <- dp[p+2] tau <- if(family=="ESN") as.numeric(dp[p+3]) else 0 delta <- if(abs(alpha) < Inf) alpha/sqrt(1+alpha^2) else sign(alpha) mu.Z <- zeta(1,tau)*delta s.Z <- sqrt(1+zeta(2,tau)*delta^2) gamma1 <- zeta(3,tau)*(delta/s.Z)^3 sigma <- omega*s.Z mu <- dp[1:p] mu[1] <- dp[1]+sigma*mu.Z/s.Z beta1 <- if(p>1) mu[2:p] else NULL cp <- c(mu, sigma, gamma1, if(family=="ESN") tau else NULL) names(cp) <- param.names("CP", family, p, x.names=names(beta1)) } if(family=="ST" || family=="SC") { if(cp.type=="auto") cp.type <- if(family == "SC" || dp[4] <= 4) "pseudo" else "proper" } if(family %in% c("SC", "ST")) { fixed.nu <- if(family=="SC") 1 else NULL cp <- st.dp2cp(dp, cp.type, fixed.nu, jacobian=FALSE, upto=upto) if(is.null(cp)) {cat("no CP could be found\n"); return(invisible())} # param.type <- switch(cp.type, proper="CP", pseudo="pseudo-CP") # names(cp) <- param.names(param.type, family) } return(cp) } dp2cpMv <- function(dp, family, cp.type="proper", fixed.nu=NULL, aux=FALSE, upto=NULL) {# internal. NB: name of cp[1] must change according to dp[1] cp.type <- match.arg(cp.type, c("proper", "pseudo", "auto")) family <- toupper(family) if(!(family %in% c("SN", "ESN", "ST","SC"))) stop(gettextf("family '%s' is not supported", family), domain = NA) if(family %in% c("SN","ESN")){ if(cp.type == "pseudo") warning("'cp.type=pseudo' makes no sense for SN and ESN families") cp <- msn.dp2cp(dp, aux=aux) } if(family %in% c("SC","ST")){ if(cp.type=="auto") cp.type <- if(family == "SC" || dp$nu <= 4) "pseudo" else "proper" if(family == "SC") fixed.nu <- 1 cp <- mst.dp2cp(dp, cp.type=cp.type, fixed.nu=fixed.nu, aux=aux, upto=upto) if(is.null(cp)) {message("no CP could be found"); return(invisible())} } return(cp) } msn.dp2cp <- function(dp, aux=FALSE) {# dp2cp for multivariate SN and ESN alpha <- dp$alpha d <- length(alpha) Omega <- matrix(dp$Omega, d, d) omega <- sqrt(diag(Omega)) lot <- delta.etc(alpha, Omega) delta <- lot$delta delta.star <- lot$delta.star alpha.star <- lot$alpha.star names(delta) <- names(dp$alpha) tau <- if(is.null(dp$tau)) 0 else dp$tau mu.z <- zeta(1, tau) * delta sd.z <- sqrt(1 + zeta(2, tau) * delta^2) Sigma <- Omega + zeta(2,tau) * outer(omega*delta, omega*delta) gamma1 <- zeta(3, tau) * (delta/sd.z)^3 if(is.vector(dp[[1]])) { cp <- list(mean=dp[[1]] + mu.z*omega, var.cov=Sigma, gamma1=gamma1) } else { beta <- dp[[1]] beta[1,] <- beta[1,] + mu.z*omega cp <- list(beta=beta, var.cov=Sigma, gamma1=gamma1) } if(!is.null(dp$tau)) cp$tau <- tau if(aux){ lambda <- delta/sqrt(1-delta^2) D <- diag(sqrt(1+lambda^2)) Ocor <- lot$Omega.cor Psi <- D %*% (Ocor-outer(delta,delta)) %*% D Psi <- (Psi + t(Psi))/2 O.inv <- pd.solve(Omega) O.pcor <- -cov2cor(O.inv) O.pcor[cbind(1:d, 1:d)] <- 1 R <- force.symmetry(Ocor + zeta(2,tau)*outer(delta,delta)) ratio2 <- delta.star^2/(1+zeta(2,tau)*delta.star^2) mardia <- c(gamma1M=zeta(3,tau)^2*ratio2^3, gamma2M=zeta(4,tau)*ratio2^2) # see book: (5.74), (5.75) on p.153 cp$aux <- list(omega=omega, cor=R, Omega.inv=O.inv, Omega.cor=Ocor, Omega.pcor=O.pcor, lambda=lambda, Psi=Psi, delta=delta, delta.star=delta.star, alpha.star=alpha.star, mardia=mardia) } return(cp) } mst.dp2cp <- function(dp, cp.type="proper", fixed.nu=NULL, aux=FALSE, upto=NULL) {# dp2cp for multivariate ST, returns NULL if CP not found (implicitly silent) nu <- if(is.null(fixed.nu)) dp[[4]] else fixed.nu if(is.null(upto)) upto <- 4L if((round(upto) != upto)||(upto < 1)) stop("'upto' must be positive integer") if(nu <= upto && (cp.type =="proper")) return(NULL) if(cp.type == "proper") { if(nu <= upto) # stop(gettextf("d.f. '%s' too small, CP is undefined", nu), domain = NA) return(NULL) a <- rep(0, upto) tilde <- NULL } else { a <- (1:upto) tilde <- rep("~", upto) } alpha <- dp$alpha d <- length(alpha) Omega <- matrix(dp$Omega, d, d) omega <- sqrt(diag(Omega)) lot <- delta.etc(alpha, Omega) delta <- lot$delta delta.star <- lot$delta.star alpha.star <- lot$alpha.star comp.names <- names(dp$alpha) names(delta) <- comp.names mu0 <- b(nu+a[1]) * delta * omega names(mu0) <- comp.names mu.2 <- b(nu+a[2]) * delta * omega if(is.vector(dp[[1]])) cp <- list(mean=dp[[1]] + mu0) else { beta <- dp[[1]] beta[1,] <- beta[1,] + mu0 cp <- list(beta=beta) } if(upto > 1) { Sigma <- Omega * (nu+a[2])/(nu+a[2]-2) - outer(mu.2, mu.2) dimnames(Sigma) <- list(comp.names, comp.names) cp$var.cov <- Sigma } if(upto > 2) cp$gamma1 <- st.gamma1(delta, nu+a[3]) if(upto > 3) cp$gamma2M <- mst.gamma2M(delta.star^2, nu+a[4], d) names(cp) <- paste(names(cp), tilde, sep="") # cp <- cp[1:length(dp)] if(aux){ mardia <- mst.mardia(delta.star^2, nu, d) cp$aux <- list(fixed.nu=fixed.nu, omega=omega, Omega.cor=lot$Omega.cor, delta=delta, delta.star=delta.star, alpha.star=alpha.star, mardia=mardia) } return(cp) } mst.gamma2M <- function(delta.sq, nu, d) { # Mardia's index of kurtosis gamma_2 for ST-d if(delta.sq < 0 | delta.sq >1 ) stop("delta.sq not in (0,1)") ifelse(nu>4, {R <- b(nu)^2 * delta.sq * (nu-2)/nu R1R <- R/(1-R) (2*d*(d+2)/(nu-4) + (R/(1-R)^2)*8/((nu-3)*(nu-4)) +2*R1R^2*(-(nu^2-4*nu+1)/((nu-3)*(nu-4))+2*(nu/((nu-3)*b(nu)^2)-1)) +4*d*R1R/((nu-3)*(nu-4))) }, Inf) } mst.mardia <- function(delta.sq, nu, d) {# Mardia's gamma1 and gamam2 for MST; book (6.31), (6.32), p.178 if(delta.sq < 0 | delta.sq > 1) stop("delta.sq not in (0,1)") if(d < 1) stop("d < 1") cum <- st.cumulants(0, 1, sqrt(delta.sq/(1-delta.sq)), nu) mu <- cum[1] sigma <- sqrt(cum[2]) gamma1 <- cum[3]/sigma^3 gamma2 <- cum[4]/sigma^4 gamma1M <- if(nu > 3) (gamma1^2 + 3*(d-1)*mu^2/((nu-3)*sigma^2)) else Inf r <- function(nu, k1, k2) 1/(1 - k2/nu) - k1/(nu - k2) # (nu-k1)/(nu-k2) gamma2M <- if(nu > 4) (gamma2 + 3 +(d^2-1)*r(nu,2,4) +2*(d-1)*(r(nu,0,4) -mu^2*r(nu,1,3))/sigma^2 - d*(d+2)) else Inf return(c(gamma1M=gamma1M, gamma2M=gamma2M)) } cp2dp <- function(cp, family){ family <- toupper(family) if(!(family %in% c("SN", "ESN", "ST","SC"))) stop(gettextf("family '%s' is not supported", family), domain = NA) if(is.list(cp)) cp2dpMv(cp, family) else cp2dpUv(cp, family) } cp2dpUv <- function(cp, family, silent=FALSE, tol=1e-8) { # internal function; works also with regression parameters included if(family %in% c("SN","ESN")) { if(family=="ESN") stop("cp2dp for ESN not yet implemented") p <- length(cp)-2-as.numeric(family=="ESN") beta1 <- if (p>1) cp[2:p] else NULL b <- sqrt(2/pi) sigma <- cp[p+1] if(sigma <= 0) stop("s.d. must be positive") gamma1 <- cp[p+2] tau <- if(family=="ESN") as.numeric(cp[p+3]) else 0 max.gamma1 <- 0.5*(4-pi)*(2/(pi-2))^1.5 if (abs(gamma1) >= max.gamma1) { if (silent) return(NULL) else {message("gamma1 outside admissible range"); return(invisible())}} r <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^(1/3) delta <- r/(b*sqrt(1+r^2)) alpha <- delta/sqrt(1-delta^2) mu.z <- b*delta sd.z <- sqrt(1-mu.z^2) beta <- cp[1:p] omega <- cp[p+1]/sd.z beta[1] <- cp[1] - omega*mu.z dp <- as.numeric(c(beta, omega, alpha)) names(dp) <- param.names("DP", family, p, x.names=names(beta1)) return(dp) } if(family == "ST") return(st.cp2dp(cp, silent=silent, tol=tol)) if(family == "SC") stop("this makes no sense for SC family") warning(gettextf("family = '%s' is not supported", family), domain = NA) invisible(NULL) } cp2dpMv <- function(cp, family, silent=FALSE, tol=1e-8) { # internal function if(family == "SN") dp <- msn.cp2dp(cp, silent) else if(family == "ESN") stop("cp2dp for ESN not yet implemented") else if(family == "ST") dp <- mst.cp2dp(cp, silent, tol=tol) else if(family == "SC") stop("this makes no sense for SC family") else warning(gettextf("family = '%s' is not supported", family), domain = NA) return(dp) } msn.cp2dp <- function(cp, silent=FALSE) { beta <- cp[[1]] Sigma <- cp[[2]] gamma1 <- cp[[3]] d <- length(gamma1) b <- sqrt(2/pi) max.gamma1 <- 0.5*(4-pi)*(2/(pi-2))^1.5 if(any(abs(gamma1) >= max.gamma1)) {if(silent) return(NULL) else stop("non-admissible CP")} R <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^(1/3) delta <- R/(b*sqrt(1+R^2)) mu.z <- b*delta omega <- sqrt(diag(Sigma)/(1-mu.z^2)) Omega <- Sigma + outer(mu.z*omega, mu.z*omega) Omega.bar <- cov2cor(Omega) Obar.inv <- pd.solve(Omega.bar, silent=silent) if(is.null(Obar.inv)) {if(silent) return(NULL) else stop("non-admissible CP")} Obar.inv.delta <- as.vector(Obar.inv %*% delta) delta.sq <- sum(delta * Obar.inv.delta) if(delta.sq >= 1) {if(silent) return(NULL) else stop("non-admissible CP")} alpha <- Obar.inv.delta/sqrt(1-delta.sq) if(is.vector(beta)) { beta <- beta - omega*mu.z dp <- list(beta=beta, Omega=Omega, alpha=alpha) } else { beta[1,] <- beta[1,] - omega*mu.z dp <- list(beta=beta, Omega=Omega, alpha=alpha) } attr(dp, "delta.star") <- sqrt(delta.sq) return(dp) } st.dp2cp <- function(dp, cp.type="proper", fixed.nu=NULL, jacobian=FALSE, upto=NULL) { if(any(is.na(dp))) stop("NA's in argument 'dp'") if(!(cp.type %in% c("proper", "pseudo"))) stop("invalid cp.type") nu <- if(is.null(fixed.nu)) dp[length(dp)] else fixed.nu if(is.null(upto)) upto <- 4L if((round(upto) != upto)||(upto < 1)) stop("'upto' must be positive integer") if(nu <= upto && (cp.type =="proper")) return(NULL) p <- length(dp) - 2 - is.null(fixed.nu) beta1 <- if(p>1) dp[2:p] else NULL dp <- c(dp[1], dp[p+1], dp[p+2], nu) a <- if(cp.type == "proper") rep(0,upto) else (1:upto) omega <- dp[2] alpha <- dp[3] delta <- delta.etc(alpha) mu.z <- function(delta, nu) delta*b(nu) mu <- dp[1] + dp[2]* mu.z(delta, nu+a[1]) cp <- c(mu, beta1) if(upto > 1) { kappa2 <- function(delta,nu) nu/(nu-2) - mu.z(delta,nu)^2 sigma <- omega * sqrt(kappa2(delta, nu+a[2])) cp <- c(cp, sigma) } if(upto > 2) { g1 <- st.gamma1(delta, nu+a[3]) cp <- c(cp, g1) } if(upto > 3) { g2 <- st.gamma2(delta, nu+a[4]) cp <- c(cp, g2)} rv.comp <- c(rep(TRUE,upto-1), rep(FALSE, 4-upto)) param.type <- switch(cp.type, proper="CP", pseudo="pseudo-CP") names(cp) <- param.names(param.type, "ST", p, x.names=names(beta1), rv.comp) if(!is.null(fixed.nu) && upto==4) cp <- cp[-length(cp)] if(jacobian && (nu+a[3] > 3)) { u <- function(nu) 0.5*(1/nu + digamma((nu-1)/2) - digamma(nu/2)) Ddelta <- 1/(1+alpha^2)^1.5 Dkappa2.nu <- function(delta,nu) (-2)*(1/(nu-2)^2 + mu.z(delta,nu)^2 * u(nu)) Dg1.delta <- function(delta,nu) { # derivative di gamma1 wrt delta k2 <- kappa2(delta,nu) tmp <- nu/(nu-2)-delta^2*(nu-2*b(nu)^2*(nu-2)) (3*b(nu) *nu *tmp)/(k2^2.5 * (nu-2)*(nu-3)) } Dg1.nu <- function(delta,nu) {# derivative di gamma1 wrt nu k1 <- mu.z(delta,nu) k2 <- kappa2(delta,nu) Dk2.nu <- Dkappa2.nu(delta,nu) (g1*u(nu) + k1/k2^1.5*(-3*(3-delta^2)/(nu-3)^2 + 6/(nu-2)^2 + 4*k1^2*u(nu)) -3*g1*Dk2.nu/(2*k2)) } Dg2.delta <- function(delta,nu) {# derivative di gamma2 wrt delta k1 <- mu.z(delta, nu) k2 <- kappa2(delta,nu) 4*b(nu)^2*delta/k2 * (g2 + 3 -(2*(3-2*delta^2)*nu/(nu-3) -3*nu/(nu-2)+3*k1^2)/k2) } Dg2.nu <- function (delta, nu) {# derivative di gamma2 wrt nu k1 <- mu.z(delta, nu) k2 <- kappa2(delta,nu) b. <- b(nu) u. <- u(nu) k4 <- (3 * nu^2/((nu - 2) * (nu - 4)) -6*(delta*b.)^2 * nu*(nu-1)/((nu-2)*(nu-3)) + delta^4 * b.^2* (4*nu/(nu-3)-3*b.^2)) Dk4.nu <- (-6*nu*(3*nu-8)/((nu-2)*(nu-4))^2 -4*k1^2*(3-delta^2)*((2*u.*nu+1)*(nu-3)-nu)/(nu-3)^2 +6*k1^2*((2*u(nu)*nu+1)*(nu-2)-nu)/(nu-2)^2 -12*k1^4*u.) Dk2.nu <- Dkappa2.nu(delta,nu) Dk4.nu/k2^2 - 2*k4*Dk2.nu/k2^3 } Dcp.dp <- if(is.null(fixed.nu)) diag(1, p+3) else diag(1, p+2) Dcp.dp[1, p+1] <- mu.z(delta, nu+a[1]) Dcp.dp[1, p+2] <- omega * Ddelta * b(nu+a[1]) sigma.z <- sqrt(kappa2(delta, nu+a[2])) Dcp.dp[p+1,p+1] <- sigma.z Dcp.dp[p+1,p+2] <- -omega *delta *b(nu+a[2])^2 *Ddelta/sigma.z Dcp.dp[p+2,p+2] <- Dg1.delta(delta, nu+a[3]) * Ddelta if(is.null(fixed.nu) && (nu+a[4] > 4)) { Dcp.dp[1, p+3] <- omega * mu.z(delta, nu+a[1]) * u(nu+a[1]) Dcp.dp[p+1,p+3] <- omega * Dkappa2.nu(delta, nu+a[2])/(2 * sigma.z) Dcp.dp[p+2,p+3] <- Dg1.nu(delta, nu+a[3]) Dcp.dp[p+3,p+2] <- Dg2.delta(delta, nu+a[4]) * Ddelta Dcp.dp[p+3,p+3] <- Dg2.nu(delta, nu+a[4]) } attr(cp, "jacobian") <- Dcp.dp } return(cp) } # b <- function (nu) ifelse(nu>1, ifelse(nu < 1e8, # sqrt(nu/pi)*exp(lgamma((nu-1)/2)-lgamma(nu/2)), sqrt(2/pi)), NA) b <- function(nu){ out <- rep(NA, length(nu)) big <- 1e3 ok <- (nu>1 & (nu < big)) & (!is.na(nu)) out[nu >= big] <- sqrt(2/pi) * (1 + 0.75/nu + 25/(32*nu^2)) out[ok] <- sqrt(nu[ok]/pi) * exp(lgamma((nu[ok]-1)/2) - lgamma(nu[ok]/2)) out} # st.gamma1 <- function(delta, nu) {# this function is vectorized for delta, works with a single value of nu if(nu > 1e10) { mu <- delta*sqrt(2/pi) return(0.5*(4-pi)*mu^3/(1-mu^2)^1.5) } if(nu > 3) { mu <- delta*b(nu) k2 <- nu/(nu-2)- mu^2 k3 <- mu * (nu * (3 - delta^2)/(nu-3) -3 * nu/(nu - 2) + 2 * mu^2) gamma1 <- k3/sqrt(k2)^3 } else gamma1<- Inf*sign(delta) gamma1 } # st.gamma2 <- function(delta, nu) {# this function is vectorized for delta, works a single value of nu # if(nu > 1e10) { mu <- delta*sqrt(2/pi) return(2*(pi-3)*mu^4/(1-mu^2)^2) } if(nu > 4) { mu <- delta*b(nu) k2 <- nu/(nu-2)- mu^2 k4 <- (3 * nu^2/((nu - 2) * (nu - 4)) - 4 * mu^2 * nu * (3 - delta^2)/(nu - 3) + 6 * mu^2 * nu/(nu - 2) -3*mu^4) gamma2 <- k4/k2^2 - 3 } else gamma2 <- Inf gamma2 } # st.cp2dp <- function(cp, silent=FALSE, tol=1e-8, trace=FALSE) { fn0 <- function(log.nu, g1) st.gamma1(1, exp(log.nu)) - g1 if(any(is.na(cp))) stop("NA's in argument 'cp'") p <- length(cp)-3 x.names <- if(p>1) names(cp[2:p]) else NULL gamma1 <- cp[p+2] abs.g1 <- abs(gamma1) gamma2 <- cp[p+3] if(abs.g1 <= 0.5*(4-pi)*(2/(pi-2))^1.5) feasible <- (gamma2 > 2*(pi-3)*(2*abs.g1/(4-pi))^4/3) else { if(abs.g1 >= 4) feasible <- FALSE else { r0 <- uniroot(fn0, interval=c(log(4),1000), tol=tol, g1=abs.g1) nu0 <- exp(r0$root) feasible <- (gamma2 >= st.gamma2(1,nu0)) } } if(!feasible) { if(silent) return(NULL) else stop("CP outside feasible region")} delta <- 0.75*sign(gamma1) old <- c(delta,Inf) step <- Inf fn1 <- function(delta, g1, nu) st.gamma1(delta, nu) - g1 fn2 <- function(log.nu, g2, delta) st.gamma2(delta, exp(log.nu)) - g2 while(step > tol){ fn21 <- fn2(log(4), gamma2, delta) fn22 <- fn2(log(100), gamma2, delta) if(any(is.na(c(fn21,fn22)))) stop("parameter inversion failed") # browser() if(fn21 * fn22 > 0) return(rep(NA, p+3)) r2 <- uniroot(fn2, interval=c(log(4),100), tol=tol, g2=gamma2, delta=delta) nu <- exp(r2$root) if(fn1(-1, gamma1, nu) * fn1(1, gamma1, nu)> 0) return(rep(NA, p+3)) r1 <- uniroot(fn1, interval=c(-1,1), tol=tol, g1=gamma1, nu=nu) delta <- r1$root new <- c(delta, nu) step <- max(abs(old-new)) if(trace) cat("delta, nu, log(step):", format(c(delta, nu, log(step))),"\n") old<- new } mu.z <- delta*b(nu) omega <- cp[p+1]/sqrt(nu/(nu-2) - mu.z^2) alpha <- delta/sqrt(1-delta^2) dp <- c(cp[1]-omega*mu.z, if(p>1) cp[2:p] else NULL, omega, alpha, nu) names(dp) <- param.names("DP", "ST", p, x.names=x.names) return(dp) } mst.cp2dp <- function(cp, silent=FALSE, tol=1e-8, trace=FALSE) { mu <- drop(cp[[1]]) Sigma <- cp[[2]] gamma1 <- cp[[3]] gamma2M <- cp[[4]] d <- length(gamma1) # fn1 <- function(delta, g1, nu) st.gamma1(delta, nu) - g1 # fn2 <- function(log.nu, g2, delta.sq, d) # mst.gamma2M(delta.sq, exp(log.nu), d) - g2 if(any(abs(gamma1) >= 4)) {if(silent) return(NULL) else stop("cp$gamma1 not admissible")} dp.marg <- matrix(NA, d, 4) for(j in 1:d) { dp <- st.cp2dp(c(0,1,gamma1[j], gamma2M), silent=silent) if(is.null(dp)) {if(silent) return(NULL) else stop("no CP could be found")} dp.marg[j,] <- dp } if(trace) {cat("starting dp:\n"); print(dp.marg)} fn <- function(par, Sigma, gamma1, gamma2M, trace=FALSE){ if(trace) cat("[mst.cp2dp[fn]] par:", format(par), "\n") nu <- exp(par[1])+4 delta <- par[-1]/sqrt(1+par[-1]^2) d <- length(delta) mu.z <- delta*b(nu) omega <- sqrt(diag(Sigma)/(nu/(nu-2)-mu.z^2)) Omega.bar <- (diag(1/omega, d, d) %*% Sigma %*% diag(1/omega, d, d) + outer(mu.z, mu.z)) * (nu-2)/nu Obar.inv <- pd.solve(force.symmetry(Omega.bar)) delta.sq <- sum(delta * as.vector(Obar.inv %*% delta)) if(delta.sq >= 1) return(delta.sq*10^10) L1 <- sum((st.gamma1(delta, nu) - gamma1)^2) L2 <- (mst.gamma2M(delta.sq, nu, d) - gamma2M)^2 # if(trace){ ecat(c(nu,delta,L1,L2))} # ; readline("")} L1 + L2 } nu <- min(dp.marg[,4]) par <- c(log(nu-4), dp.marg[,3]) if(trace) cat("[mst.cp2dp] par:", format(par), "\n") opt <- nlminb(par, fn, Sigma=Sigma, gamma1=gamma1, gamma2M=gamma2M, trace=trace) if(trace) cat("[mst.cp2dp]\nopt$convergence:", opt$convergence, "\nopt$message", opt$message, "\n") if(opt$convergence != 0) { if(silent) return(NULL) else stop ("no CP could be found") } par <- opt$par nu <- exp(par[1])+4 delta <- par[-1]/sqrt(1+par[-1]^2) if(trace) { cat("[mst.cp2dp]min opt$fn:", format(opt$obj),"\n") print(c(nu,delta)) } mu.z <- delta*b(nu) omega<- sqrt(diag(Sigma)/(nu/(nu-2)-mu.z^2)) Omega.bar <- (diag(1/omega, d, d) %*% Sigma %*% diag(1/omega, d, d) + outer(mu.z,mu.z)) * (nu-2)/nu Obar.inv <- pd.solve(Omega.bar) delta.sq <- sum(delta * as.vector(Obar.inv %*% delta)) alpha <- as.vector(Obar.inv %*% delta)/sqrt(1-delta.sq) if(is.matrix(mu)) { xi <- mu xi[1,] <- mu[1,] - omega*mu.z } else xi <- mu - omega*mu.z Omega <- diag(omega) %*% Omega.bar %*% diag(omega) return(list(xi=xi, Omega=Omega, alpha=alpha, nu=nu)) } affineTransSECdistr <- function(object, a, A, name, compNames, drop=TRUE) {# object is of class SECdistrMv # computes distribution of affine transformation of SEC variable T=a+t(A)Y if(class(object) != "SECdistrMv") stop("wrong class of object") dp <- slot(object, "dp") alpha <- dp$alpha d <- length(alpha) if(!is.matrix(A) || nrow(A) != d) stop("A is not a matrix or wrong nrow(A)") h <- ncol(A) if(length(a) != h) stop("size mismatch of arguments 'a' and 'A'") if(missing(name)) name<- paste(deparse(substitute(a)), " + t(", deparse(substitute(A)), ") %*% (", deparse(substitute(object)),")", sep="") else name <- as.character(name)[1] compNames <- if(missing(compNames)) as.vector(outer("V",as.character(1:h),paste,sep="")) else as.character(as.vector(compNames)[1:h]) family <- object@family xi.X <- as.vector(a + t(A) %*% matrix(dp$xi, ncol=1)) Omega <- dp$Omega omega <- sqrt(diag(Omega)) Omega.X <- as.matrix(t(A) %*% Omega %*% A) invOmega.X <- pd.solve(Omega.X, silent=TRUE) if (is.null(invOmega.X)) stop("not full-rank transformation") omega.X <- sqrt(diag(Omega.X)) omega.delta <- omega * delta.etc(alpha, Omega)$delta m <- as.vector(invOmega.X %*% t(A) %*% matrix(omega.delta, ncol=1)) u <- sum(omega.delta * as.vector(A %*% matrix(m, ncol=1))) alpha.X <- (omega.X * m)/sqrt(1 - u) dp.X <- list(xi=xi.X, Omega=Omega.X, alpha=alpha.X) if(family == "ESN") dp.X$tau <- dp$tau if(family == "ST") dp.X$nu <- dp$nu if(h==1 & drop) { dp1 <- unlist(dp.X) dp1[2] <- sqrt(dp1[2]) names(dp1) <- names(dp.X) names(dp1)[2] <- tolower(names(dp)[2]) # new.obj <- new("SECdistrUv", dp=dp1, family=family, name=name) #?? new.obj <- makeSECdistr(dp=dp1, family=family, name=name) } else new.obj <- makeSECdistr(dp.X, family, name, compNames) # new.obj <- new("SECdistrMv", dp.X, family, name, compNames) #?? return(new.obj) } marginalSECdistr <- function(object, comp, name, drop=TRUE) {# marginals of SECdistrMv obj; 2nd version, computing marginal delta's family <- slot(object,"family") if(missing(name)) { basename <- if(object@name != "") object@name else deparse(substitute(object)) name<- paste(basename, ".components=(", paste(as.character(comp),collapse=","), ")", sep="") } else name <- as.character(name)[1] dp <- slot(object,"dp") xi <- dp$xi Omega <- dp$Omega alpha <- dp$alpha compNames <- slot(object,"compNames") d <- length(alpha) comp <- as.integer(comp) Omega11 <- Omega[comp,comp,drop=FALSE] if(length(comp) < d){ if(any(comp>d | comp<1)) stop("comp makes no sense") delta_etc <- delta.etc(alpha, Omega) delta1 <- delta_etc$delta[comp] R11 <- delta_etc$Omega.cor[comp, comp, drop=FALSE] iR11.delta1 <- as.vector(pd.solve(R11, silent=TRUE) %*% delta1) diRd <- sum(delta1*iR11.delta1) alpha1_2 <- if(diRd < 1) iR11.delta1/sqrt(1 - diRd) else sign(delta1)*Inf dp0 <- list(xi=xi[comp], Omega=Omega11, alpha=alpha1_2) } else { if(any(sort(comp) != (1:d))) stop("comp makes no sense") dp0 <- list(xi=xi[comp], Omega=Omega11, alpha=alpha[comp]) } if(family=="ESN") dp0$tau <- dp$tau if(family=="ST") dp0$nu <- dp$nu new.obj <- new("SECdistrMv", dp=dp0, family=family, name=name, compNames=compNames[comp]) if(length(comp)==1 & drop) {# new.obj <- as(new.obj, "SECdistrUv") # non va.. dp <- unlist(dp0) names(dp) <- names(dp0) dp[2] <- sqrt(dp[2]) names(dp)[2] <- "omega" new.obj <- new("SECdistrUv", dp=dp, family=family, name=compNames[comp]) } new.obj } conditionalSECdistr <- function(object, fixed.comp, fixed.values, name, drop=TRUE) { # conditional distribution of SN/ESN object family <- slot(object,"family") if(!(family %in% c("SN", "ESN"))) stop("family must be either SN or ESN") dp <- slot(object,"dp") xi <- dp$xi Omega <- dp$Omega alpha <- dp$alpha tau <- if(family=="SN") 0 else dp$tau d <- length(alpha) fix <- fixed.comp h <- length(fix) if(any(fix != round(fix)) | !all(fix %in% 1:d) | h == d) stop("fixed.comp makes no sense") if(length(fixed.values) != h) stop("length(fixed.comp) != lenght(fixed.values)") compNames <- slot(object,"compNames") if(missing(name)) { basename <- if(object@name != "") object@name else deparse(substitute(object)) name<- paste(basename,"|(", paste(compNames[fix],collapse=","), ")=(", paste(format(fixed.values),collapse=","), ")", sep="") } else name <- as.character(name)[1] # free.fix <- setdiff(1:d, fix) omega <- sqrt(diag(Omega)) omega1 <- omega[fix] omega2 <- omega[-fix] R <- cov2cor(Omega) R11 <- R[fix,fix, drop=FALSE] R12 <- R[fix,-fix, drop=FALSE] R21 <- R[-fix,fix, drop=FALSE] R22 <- R[-fix,-fix, drop=FALSE] alpha1 <- matrix(alpha[fix], ncol=1) alpha2 <- matrix(alpha[-fix], ncol=1) iR11 <- pd.solve(R11) R22.1 <- R22 - R21 %*% iR11 %*% R12 a.sum <- as.vector(t(alpha2) %*% R22.1 %*% alpha2) alpha1_2 <- as.vector(alpha1 + iR11 %*% R12 %*% alpha2)/sqrt(1+a.sum) tau2.1 <- (tau * sqrt(1 + sum(alpha1_2 * as.vector(iR11 %*% alpha1_2))) + sum(alpha1_2 * (fixed.values-xi[fix])/omega1)) O11 <- Omega[fix,fix, drop=FALSE] O12 <- Omega[fix,-fix, drop=FALSE] O21 <- Omega[-fix,fix, drop=FALSE] O22 <- Omega[-fix,-fix, drop=FALSE] iO11<- (1/omega1) * iR11 * rep(1/omega1, each=h) # solve(O11) reg <- O21 %*% iO11 xi2.1 <- as.vector(xi[-fix]+ reg %*% (fixed.values - xi[fix])) O22.1 <- O22 - reg %*% O12 omega22.1 <- sqrt(diag(O22.1)) alpha2.1 <- as.vector((omega22.1/omega2)*alpha2) dp2.1 <- list(xi=xi2.1, Omega=O22.1, alpha=alpha2.1, tau=tau2.1) obj <- if((d-h)==1 & drop) { dp2.1 <- unlist(dp2.1) dp2.1[2] <- sqrt(dp2.1[2]) names(dp2.1) <- c("xi","omega","alpha","tau") new("SECdistrUv", dp=dp2.1, family="ESN", name=name) } else new("SECdistrMv", dp=dp2.1, family="ESN", name=name, compNames=compNames[-fix]) return(obj) } delta.etc <- function(alpha, Omega=NULL) { inf <- which(abs(alpha) == Inf) if(is.null(Omega)){ # case d=1 delta <- alpha/sqrt(1+alpha^2) delta[inf] <- sign(alpha[inf]) return(delta) } else { # d>1 if(any(dim(Omega) != rep(length(alpha),2))) stop("dimension mismatch") Ocor <- cov2cor(Omega) if(length(inf) == 0) { # d>1, standard case Ocor.alpha <- as.vector(Ocor %*% alpha) alpha.sq <- sum(alpha * Ocor.alpha) delta <- Ocor.alpha/sqrt(1+alpha.sq) alpha. <- sqrt(alpha.sq) delta. <- sqrt(alpha.sq/(1+alpha.sq)) } else { # d>1, case with some abs(alpha)=Inf if(length(inf) > 1) warning("Several abs(alpha)==Inf, I handle them as 'equal-rate Inf'") k <- rep(0,length(alpha)) k[inf] <- sign(alpha[inf]) Ocor.k <- as.vector(Ocor %*% k) delta <- Ocor.k/sqrt(sum(k * Ocor.k)) delta. <- 1 alpha. <- Inf } return(list(delta=delta, alpha.star=alpha., delta.star=delta., Omega.cor=Ocor)) } } selm <- function (formula, family="SN", data, weights, subset, na.action, start=NULL, fixed.param=list(), method="MLE", penalty=NULL, offset, model=TRUE, x = FALSE, y = FALSE, ...) { ret.x <- x ret.y <- y cl <- match.call() formula <- as.formula(formula) if (length(formula) < 3) stop("formula must be a two-sided formula") mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action", "offset"), names(mf), 0L) mf <- mf[c(1L, m)] mf$drop.unused.levels <- TRUE mf[[1L]] <- as.name("model.frame") mf <- eval(mf, parent.frame()) if(!(method %in% c("MLE", "MPLE"))) { warning(gettextf("method = '%s' is not supported, replaced by 'MLE'", method), domain = NA) method <- "MLE"} penalty.name <- if(method == "MPLE") { if(is.null(penalty)) "Qpenalty" else penalty } else NULL contr <- list(penalty=penalty.name, trace=FALSE, info.type="observed", opt.method="nlminb", opt.control=list()) control <- list(...) contr[(namc <- names(control))] <- control if (length(noNms <- namc[!namc %in% names(contr)])) warning( "unknown names in control: ", paste(noNms, collapse = ", ")) mt <- attr(mf, "terms") y <- model.response(mf, "numeric") w <- as.vector(model.weights(mf)) if(is.null(w)) w <- rep(1, NROW(y)) if(any(w != round(w)) | all(w == 0)) stop("weights must be non-negative integers (=frequencies), not all 0") offset <- as.vector(model.offset(mf)) if (!is.null(offset)) { if (length(offset) == 1) offset <- rep(offset, NROW(y)) else if (length(offset) != NROW(y)) stop(gettextf( "number of offsets is %d, should equal %d (number of observations)", length(offset), NROW(y)), domain = NA) } if(length(fixed.param) > 0) { if(any(names(fixed.param) != "nu")) stop("Wrong 'fixed.param': currently only 'nu' can be fixed") } if (is.empty.model(mt)) stop("empty model") else { x <- model.matrix(mt, mf, contrasts) xt <- pd.solve(t(x) %*% (w*x), silent=TRUE) if(is.null(xt)) stop("design matrix appears to be of non-full rank") z <- selm.fit(x, y, family=family, start, w=w, fixed.param=fixed.param, offset=offset, selm.control=contr) } class(z) <- c(if (is.matrix(y)) "mselm", "selm") z$na.action <- attr(mf, "na.action") z$offset <- offset z$contrasts <- attr(x, "contrasts") z$xlevels <- .getXlevels(mt, mf) z$call <- cl z$terms <- mt input <- list() if (model) input$model <- mf if (ret.x) input$x <- x if (ret.y) input$y <- y input$weights <- as.vector(model.weights(mf)) input$offset <- as.vector(model.offset(mf)) cl.obj <- if(is.matrix(y)) "mselm" else "selm" obj <- new(class(z), call=cl, family=toupper(family), logL=z$logL, method=c(method, contr$penalty), param=z$param, param.var=z$param.var, size=z$size, residuals.dp=z$resid.dp, fitted.values.dp=z$fitted.dp, control=control, input=input, opt.method=z$opt.method) return(obj) } # #selm.control <- function(method="MLE", info.type="observed", # trace=FALSE, algorithm="nlminb", opt.control=list()) #{ # if(algorithm !="nlminb") stop("only algorithm='nlminb' handled so far") # if(info.type !="observed") stop("only info.type='observed' handled so far") # list(method=method, info.type=info.type, trace=trace, # algorithm=algorithm, opt.control=opt.control) #} #------------------------------------------------------ selm.fit <- function (x, y, family="SN", start=NULL, w, fixed.param=list(), offset = NULL, selm.control) { if (!(toupper(family) %in% c("SN", "ST", "SC"))) stop(gettextf("I do not know family '%s'", family), domain = NA) family <- toupper(family) if (is.null(n <- nrow(x))) stop("'x' must be a matrix") if (n == 0L) stop("0 (non-NA) cases") p <- ncol(x) if ((p == 0L) || !(all(data.matrix(x)[,1] == 1))) stop("first column of model matrix is not all 1's") y <- drop(y) d <- NCOL(y) if(d>1 && is.null(colnames(y))) colnames(y) <- paste("V", 1:d, sep="") if(is.null(colnames(x))) colnames(x) <- paste("x", 0L:(p-1), sep=".") if (!is.null(offset)) y <- (y - offset) if (NROW(y) != n) stop("incompatible dimensions") if (missing(w) || is.null(w)) w <- rep(1, n) if(missing(selm.control)) selm.control <- list(penalty=NULL, trace=FALSE, info.type="observed", opt.method="nlminb", opt.control=list()) zero.weights <- any(w == 0) if(zero.weights) { save.r <- y save.f <- y save.w <- w ok <- (w != 0) nok <- !ok w <- w[ok] x0 <- x[!ok, , drop = FALSE] x <- x[ok, , drop = FALSE] n <- nrow(x) y0 <- if (d > 1L) y[!ok, , drop = FALSE] else y[!ok] y <- if (d > 1L) y[ok, , drop = FALSE] else y[ok] } storage.mode(x) <- "double" storage.mode(y) <- "double" contr <- selm.control info.type <- contr$info.type # so far, only "observed" y0 <- if(contr$info.type == "observed") y else NULL penalty <- if(is.null(contr$penalty)) NULL else get(contr$penalty, inherits=TRUE) trace <- contr$trace if(d == 1) { y <- as.vector(y) if(family == "SN") { npar <- p+2 cp <- if(is.null(start)) NULL else dp2cpUv(start, "SN") fit <- sn.mple(x, y, cp, w, penalty, trace) fit$opt.method <- fit$opt.method fit$opt.method$called.by <- "sn.mple" fit$dp <- cp2dpUv(cp=fit$cp, family="SN") boundary <- fit$boundary mu0 <- fit$cp[1] - fit$dp[1] info <- if(boundary) NULL else sn.infoUv(dp=fit$dp, x=x, y=y0, w=w, penalty=penalty) } if(family == "ST") { fixed.nu <- fixed.param$nu npar <- p + 2 + as.numeric(is.null(fixed.nu)) fit <- st.mple(x, y, dp=start, fixed.nu, w, penalty, trace) fit$opt.method <- fit$opt.method fit$opt.method$called.by <- "st.mple" dp <- fit$dp cp <- st.dp2cp(dp, cp.type="proper", fixed.nu=fixed.nu, upto=4-as.numeric(!is.null(fixed.nu))) p_cp<- st.dp2cp(dp, cp.type="pseudo", fixed.nu=fixed.nu, jacobian=TRUE) fit$cp <- cp[1:npar] fit$p_cp <- p_cp[1:npar] Dpseudocp.dp <- attr(p_cp, "jacobian")[1:npar, 1:npar] attr(p_cp, "jacobian") <- NULL boundary <- fit$boundary nu <- if(is.null(fixed.nu)) dp[npar] else fixed.nu mu0 <- if(nu <= 1) NA else st.dp2cp(dp, fixed.nu=fixed.nu, upto=1)[1] - dp[1] info <- if(boundary) NULL else st.infoUv(dp=fit$dp, NULL, x, y0, fixed.nu, w=w) } if(family == "SC") { npar <- p + 2 fit <- st.mple(x, y, dp=start, fixed.nu=1, w=w, penalty, trace) fit$opt.method <- fit$opt.method fit$opt.method$called.by <- "st.mple" fit$cp <- NULL p_cp0 <- st.dp2cp(fit$dp, cp.type="pseudo", fixed.nu=1, jacobian=TRUE) fit$p_cp <- p_cp0[1:npar] Dpseudocp.dp <- attr(p_cp0, "jacobian")[1:npar, 1:npar] attr(p_cp0, "jacobian") <- NULL boundary <- fit$boundary mu0 <- NA info <- if(boundary) NULL else st.infoUv(dp=fit$dp, x=x, y=y0, fixed.nu=1, w=w) } if(!boundary && family %in% c("ST","SC")) info$asyvar.p_cp <- Dpseudocp.dp %*% info$asyvar.dp %*% t(Dpseudocp.dp) beta.dp <- fit$dp[1:p] dp <- fit$dp cp <- fit$cp } else { # d>1 if(family == "SN") { npar <- p*d+d*(d+1)/2+d if(is.null(penalty)) { fit <- msn.mle(x, y, start, w, trace=trace, opt.method=contr$opt.method, control=contr$opt.control) fit$opt.method$called.by <- "msn.mle" boundary <- ((1 - fit$aux$delta.star) < .Machine$double.eps^(1/4)) if(!boundary) info <- sn.infoMv(fit$dp, x=x, y=y0, w=w) } else { fit <- msn.mple(x, y, start, w, penalty, trace=trace, opt.method=contr$opt.method, control=contr$opt.control) fit$opt.method$called.by <- "msn.mple" boundary <- FALSE info <- sn.infoMv(fit$dp, x=x, w=w) } fit$cp <- msn.dp2cp(fit$dp) mu0 <- as.vector(fit$cp[[1]][1,] - fit$dp[[1]][1,]) } if(family == "ST"){ fixed.nu <- fixed.param$nu fit <- mst.mple(x, y, start, w, penalty=penalty, fixed.nu=fixed.nu, trace=trace, opt.method=contr$opt.method, control=contr$opt.control) fit$opt.method$called.by <- "mst.mple" npar <- p*d + d*(d+1)/2 + d+ as.numeric(is.null(fixed.nu)) boundary <- fit$boundary dp <- fit$dp nu <- if(is.null(fixed.nu)) dp$nu else fixed.nu mu0 <- if(nu <= 1) NA else as.vector( mst.dp2cp(dp, fixed.nu=fixed.nu, upto=1)[[1]][1,] - dp[[1]][1,]) fit$cp <- mst.dp2cp(dp, cp.type="proper", fixed.nu=fixed.nu) fit$p_cp <- mst.dp2cp(dp, cp.type="pseudo", fixed.nu=fixed.nu) if(!boundary) info <- st.infoMv(fit$dp, x=x, y=y0, fixed.nu) } if(family == "SC") { if(is.null(start)) { fit.sn <- msn.mle(x, y, NULL, w, control=list(rel.tol=1e-4)) start <- fit.sn$dp } fit <- mst.mple(x, y, start, w, penalty=penalty, fixed.nu=1, trace=trace, opt.method=contr$opt.method, control=contr$opt.control) fit$opt.method$called.by <- "mst.mple" npar <- p*d + d*(d+1)/2 + d boundary <- fit$boundary mu0 <- NA fit$cp <- NULL fit$p_cp <- mst.dp2cp(fit$dp, "pseudo", fixed.nu=1) if(!boundary) info <- st.infoMv(fit$dp, x=x, y=y0, fixed.nu=1) } beta.dp <- fit$dp[[1]] } param <- list(dp=fit$dp, cp=fit$cp, "pseudo-cp"=fit$p_cp, boundary=boundary, mu0=mu0) if(!boundary && !is.null(info)) { asyvar.dp <- info$asyvar.dp[1:npar, 1:npar] asyvar.cp <- info$asyvar.cp[1:npar, 1:npar] asyvar.p_cp <- info$asyvar.p_cp[1:npar, 1:npar] param.var <- list(info.type=info.type, dp=asyvar.dp, cp=asyvar.cp, "pseudo-cp"=asyvar.p_cp) } else param.var <- list() dn <- colnames(x) fv <- drop(x %*% beta.dp) if(is.matrix(fv)) colnames(fv) <- colnames(y) size <- c(d=d, p=p, n.param=npar, n.obs=NROW(y), nw.obs=sum(w)) z <- list(logL=fit$logL, param=param, param.var=param.var, fitted.dp=fv, resid.dp=y-fv, size=size, opt.method=fit$opt.method) r1 <- y - z$resid.dp z$weights <- w if (zero.weights) { coef[is.na(coef)] <- 0 f0 <- x0 %*% coef if (d > 1) { save.r[ok, ] <- z$residuals save.r[nok, ] <- y0 - f0 save.f[ok, ] <- z$fitted.values save.f[nok, ] <- f0 } else { save.r[ok] <- z$residuals save.r[nok] <- y0 - f0 save.f[ok] <- z$fitted.values save.f[nok] <- f0 } z$residuals <- save.r z$fitted.values <- save.f z$weights <- save.w } if (!is.null(offset)) z$fitted.values <- z$fitted.values + offset if (!is.null(offset)) r1 <- r1 + offset z$fitted.dp <- r1 z$param$fixed <- if(is.null(fixed.param)) list() else fixed.param return(z) } #--------------------------------------------------- summary.selm <- function(object, param.type="CP", cov=FALSE, cor=FALSE) { lc.param.type <- tolower(param.type) if(!(lc.param.type %in% c("cp", "dp", "pseudo-cp"))) stop(gettextf("unknown param.type '%s'", param.type), domain = NA) param.type <- switch(lc.param.type, "dp"="DP", "cp"="CP", "pseudo-cp"="pseudo-CP") family <- slot(object,"family") if(param.type=="pseudo-CP" && !(family %in% c("ST", "SC"))) stop("pseudo-CP makes sense only for ST and SC families") if (!(family %in% c("SN","ST","SC"))) stop(gettextf("family '%s' not (yet) handled", family), domain = NA) param <- slot(object, "param")[[lc.param.type]] if(param.type=="CP" && is.null(param)) { if(family %in% c("ST", "SC")) { {message("CP does not esist. Consider param.type='DP' or 'pseudo-CP'") return(invisible())}}} param.var <- slot(object, "param.var")[[lc.param.type]] if(is.null(param.var)) param.var <- diag(NA, length(param)) se <- sqrt(diag(param.var)) z <- param/se param.table <- cbind(param, se, z, 2*pnorm(-abs(z))) dimnames(param.table) <- list(names(param), c("estimate","std.err","z-ratio", "Pr{>|z|}")) resid <- residuals(object, lc.param.type) aux <- list() aux$param.cov <- if(cov) param.var else NULL aux$param.cor <- if(cor) cov2cor(param.var) else NULL out <- new("summary.selm", call=slot(object,"call"), family = slot(object, "family"), logL = slot(object, "logL"), method=slot(object, "method"), resid = resid, param.type = param.type, param.table = param.table, param.fixed = slot(object, "param")$fixed, control = slot(object, "control"), aux = aux, boundary=slot(object, "param")$boundary, size=object@size) out } residuals.selm <- function(object, param.type="CP"){ param.type <- tolower(param.type) if(!(param.type %in% c("cp", "dp", "pseudo-cp"))) stop("param.type must be either 'CP' or 'DP' or 'pseudo-CP'") # param <- slot(object, "param")[[param.type]] p <- object@size["p"] n <- object@size["n.obs"] r <- slot(object, "residuals.dp") dp <- slot(object, "param")$dp pseudo.mu0 <- (slot(object, "param")$"pseudo-cp"[1] - dp[1]) resid <- switch(param.type, 'dp' = r, 'cp' = r - rep(slot(object,"param")$mu0, n), 'pseudo-cp' = r - rep(pseudo.mu0, n)) # resid <- resid/param[p+1] # AA: standardize resid? w <- slot(object,"input")$weights if(!is.null(w)) attr(resid,"weights") <- w return(resid) } fitted.selm <- function(object, param.type="CP") { param.type <- tolower(param.type) if(!(param.type %in% c("cp", "dp", "pseudo-cp"))) stop("param.type must be either 'CP' or 'DP' or 'pseudo-CP'") # param <- slot(object, "param")[[param.type]] n <- object@size["n.obs"] dp <- slot(object, "param")$dp fit.dp <- slot(object,"fitted.values.dp") pseudo.mu0 <- (slot(object, "param")$"pseudo-cp"[1] - dp[1]) fitted <- switch(param.type, 'dp' = fit.dp, 'cp' = fit.dp + rep(slot(object,"param")$mu0, n), 'pseudo-cp' = fit.dp + rep(pseudo.mu0, n)) w <- slot(object, "input")$weights if(!is.null(w)) attr(fitted,"weights") <- w return(fitted) } weights.selm <- function(object) slot(object, "input")$weights summary.mselm <- function(object, param.type="CP", cov=FALSE, cor=FALSE) { lc.param.type <- tolower(param.type) if(!(lc.param.type %in% c("cp", "dp", "pseudo-cp"))) stop(gettextf("unknown param.type '%s'", param.type), domain = NA) param.type <- switch(lc.param.type, "dp"="DP", "cp"="CP", "pseudo-cp"="pseudo-CP") family <- slot(object,"family") method <- slot(object, "method") if(param.type=="pseudo-CP" & !(family %in% c("ST","SC"))) stop("pseudo-CP makes sense only for ST and SC families") # if (family != "SN") stop("this family is not yet implemented") p <- object@size["p"] d <- object@size["d"] npar <- object@size["n.param"] param <- object@param[[lc.param.type]] if(is.null(param) && family %in% c("ST", "SC")) { message("CP does not esist. Consider param.type='DP' or 'pseudo-CP'") return(invisible())} beta <- param[[1]] param.var <- slot(object, "param.var")[[lc.param.type]] if(object@param$boundary | is.null(param.var)) param.var <- matrix(NA, npar, npar) coef.tables <- list() par.names <- param.names(param.type, family, p, x.names=rownames(beta)[-1]) for(j in 1:d) { beta.j <- beta[,j] var.j <- param.var[((j-1)*p+1):(j*p), ((j-1)*p+1):(j*p), drop=FALSE] se.j <- sqrt(diag(var.j)) z <- beta.j/se.j coef.table <- cbind(beta.j, se.j, z, 2*pnorm(-abs(z))) dimnames(coef.table) <- list(par.names[1:p], c("estimate","std.err","z-ratio", "Pr{>|z|}")) coef.tables[[j]] <- coef.table } scatter <- list(matrix=param[[2]], name=names(param)[2]) resid <- residuals.mselm(object, param.type) # resid <- t(t(resid)/sqrt(diag(scatter$matrix))) # for normalized/std resid se.slant <- sqrt(diag(param.var)[(p*d+d*(d+1)/2+1):(p*d+d*(d+1)/2+d)]) slant <- list(param=param[[3]], se=se.slant, name=names(param)[3]) tail <- if(length(param) == 3 ) list() else list(param=param[[4]], se=sqrt(diag(param.var)[npar]), name=names(param)[4]) aux <- list() aux$param.cov <- if(cov) param.var else NULL aux$param.cor <- if(cor) cov2cor(param.var) else NULL out <- new("summary.mselm", call=slot(object,"call"), family = family, logL = slot(object, "logL"), method=slot(object, "method"), resid = resid, param.type=param.type, coef.tables = coef.tables, param.fixed = slot(object, "param")$fixed, scatter = scatter, slant = slant, tail = tail, control = slot(object, "control"), aux = aux, boundary=slot(object, "param")$boundary, size=slot(object, "size")) out } residuals.mselm <- function(object, param.type="CP"){ param.type <- tolower(param.type) if(!(param.type %in% c("cp", "dp", "pseudo-cp"))) stop("param.type must be either 'CP' or 'DP' or 'pseudo-CP'") # param <- slot(object, "param")[[param.type]] # beta <- param[[1]] n <- object@size["n.obs"] r <- slot(object,"residuals.dp") param <- slot(object, "param") pseudo.mu0 <- as.vector(param$"pseudo-cp"[[1]][1,] - param$dp[[1]][1, ]) resid <- switch(param.type, 'dp' = r, 'cp' = r - outer(rep(1,n), param$mu0), 'pseudo-cp' = r - outer(rep(1,n), pseudo.mu0)) w <- slot(object, "input")$weights if(!is.null(w)) attr(resid,"weights") <- w return(resid) } fitted.mselm <- function(object, param.type="CP") { param.type <- tolower(param.type) if(!(param.type %in% c("cp", "dp", "pseudo-cp"))) stop("param.type must be either 'CP' or 'DP' or 'pseudo-CP'") n <- object@size["n.obs"] fit.dp <- slot(object, "fitted.values.dp") param <- slot(object, "param") pseudo.mu0 <- as.vector(param$"pseudo-cp"[[1]][1,] - param$dp[[1]][1, ]) fitted <- switch(param.type, 'dp' = fit.dp, 'cp' = fit.dp + outer(rep(1,n), param$mu0), 'pseudo-cp' = fit.dp + outer(rep(1,n), pseudo.mu0)) w <- slot(object, "input")$weights if(!is.null(w)) attr(fitted,"weights") <- w return(fitted) } weights.mselm <- function(object) slot(object, "input")$weights #------------------------------------------------------------ # # sn.info<- function(dp=NULL, cp=NULL, x=NULL, y=NULL, w, penalty=NULL, # type="observed", norm2.tol=1e-6) { # if(any(is.list(dp), is.list(cp))) { # if(is.null(dp)) stop("in the multivariate case, 'dp' must be non-NULL") # info <- sn.infoMv(dp=dp, x=x, y=y, w=w, type=type, norm2.tol=norm2.tol) # } else { # if(any(is.numeric(dp), is.numeric(cp))) # info <- sn.infoUv(dp=dp, cp=cp, x=x, y=y, w=w, penalty=penalty, # type=type, norm2.tol = norm2.tol) # else stop("invalid input") # } # return(info) # } sn.infoUv <- function(dp=NULL, cp=NULL, x=NULL, y, w, penalty=NULL, norm2.tol=1e-6) {# computes observed/expected Fisher information for univariate SN variates if(missing(y)) {y <- NULL; type <- "expected"} else type <- "observed" if(type == "observed") {if(!is.numeric(y)) stop("y is non-numeric")} if(is.null(dp) & is.null(cp)) stop("either dp or cp must be set") if(!is.null(dp) & !is.null(cp)) stop("cannot set both dp and cp") if(is.null(cp)) { cp <- dp2cpUv(dp, "SN") x0.names <- names(dp) } if(is.null(dp)) { dp <- cp2dpUv(cp, "SN") x0.names <- names(cp) } if(missing(w)) w <- rep(1, max(NROW(cbind(x,y)),1)) if(any(w != round(w)) | any(w<0)) stop("weights must be non-negative integers") n <- length(w) nw <- sum(w) if(is.null(x)) { p <- 1 wx <- w xx <- sum.x <- nw x <- matrix(1, nrow=n, ncol=1) x.names <- x0.names } else { p <- NCOL(x) # x <- matrix(x, n, p) wx <- w*x xx <- t(x) %*% (wx) sum.x <- matrix(apply(wx,2,sum)) if(is.null(x0.names)) x0.names <- colnames(x) if(length(x0.names) < (p+2)) x0.names<- paste("x", 0L:(p-1), sep=".") x.names <- x0.names[2:p] } if(length(cp) != (p+2)| length(dp) != (p+2)) stop("length(dp|cp) must be equal to ncol(x)+2") omega <- dp[p+1] alpha <- dp[p+2] mu.z <- sqrt(2/pi)*alpha/sqrt(1+alpha^2) sd.z <- sqrt(1-mu.z^2) sigma <- cp[p+1] gamma1 <- cp[p+2] R <- mu.z/sd.z T <- sqrt(2/pi-(1-2/pi)*R^2) Da.Dg <- 2*(T/(T*R)^2+(1-2/pi)/T^3)/(3*(4-pi)) Dmu.z <- sqrt(2/pi)/(1+alpha^2)^1.5 Dsd.z <- (-mu.z/sd.z)*Dmu.z Ddp.cp <- diag(p+2) Ddp.cp[1,p+1] <- (-R) Ddp.cp[1,p+2] <- (-sigma*R)/(3*gamma1) Ddp.cp[p+1,p+1] <- 1/sd.z Ddp.cp[p+1,p+2] <- (-sigma)* Dsd.z* Da.Dg/sd.z^2 Ddp.cp[p+2,p+2] <- Da.Dg I.dp <- I.cp <- matrix(NA,p+2,p+2) if(type == "observed"){ score <- sn.pdev.gh(cp, x, y, w, penalty, trace=FALSE, hessian=TRUE)/(-2) I.cp <- attr(score, "hessian")/2 attr(score,"hessian") <- NULL Dcp.dp <- solve(Ddp.cp) I.dp <- force.symmetry(t(Dcp.dp) %*% I.cp %*% Dcp.dp) a.coef <- NULL asyvar.cp <- pd.solve(I.cp, silent=TRUE) if(is.null(asyvar.cp)) { asyvar.dp <- NULL not.mle <- TRUE} else { not.mle <- (abs(sum(score * as.vector(asyvar.cp %*% score))) > norm2.tol) asyvar.dp <- pd.solve(I.dp, silent=TRUE) } if(not.mle) warning("parameters do not seem at MLE") #--Iinfo.dp 2nd form I2 <- matrix(NA,p+2,p+2) z <- (y - as.vector(x%*% dp[1:p]))/omega z1 <- zeta(1, alpha*z) z2 <- zeta(2, alpha*z) I2[1:p,1:p] <- t(wx) %*% ((1 - alpha^2*z2)*x)/omega^2 I2[1:p,p+1] <- t(wx) %*% (2*z - alpha*z1 - alpha^2*z2*z)/omega^2 I2[p+1,1:p] <- t(I2[1:p,p+1]) I2[1:p,p+2] <- t(wx) %*% (z1 + alpha*z2*z)/omega I2[p+2,1:p] <- t(I2[1:p,p+2]) I2[p+1,p+1] <- (-nw + 3*sum(w*z^2) -2*alpha*sum(w*z1*z) -alpha^2*sum(w*z2*z^2))/omega^2 I2[p+1,p+2] <- I2[p+2,p+1] <- (sum(w*z*z1) + alpha*sum(w*z2*z^2))/omega I2[p+2,p+2] <- sum(-w*z2*z^2) } else { # type == "expected" I2 <- NULL if(abs(alpha) < 200) { f.a <- function(x, alpha, k) x^k * dsn(x,0,1,alpha) * zeta(1,alpha*x)^2 err <- .Machine$double.eps^0.5 a0 <- integrate(f.a, -Inf, Inf, alpha=alpha, k=0, rel.tol=err)$value a1 <- integrate(f.a, -Inf, Inf, alpha=alpha, k=1, rel.tol=err)$value a2 <- integrate(f.a, -Inf, Inf, alpha=alpha, k=2, rel.tol=err)$value } else {# approx of Bayes & Branco (2007) with multiplicative adjustment u <- 1 + 8*(alpha/pi)^2 b <- sqrt(2/pi) a0 <- 1.019149098 * b^2/sqrt(u) a1 <- 1.020466516 * (-alpha * b^3/sqrt(u^3*(1+alpha^2/u))) a2 <- 1.009258704 * b^2/sqrt(u)^3 } a.coef <- c(a0, a1, a2) I.dp[1:p,1:p] <- xx * (1+alpha^2*a0)/omega^2 I.dp[p+1,p+1] <- nw * (2+alpha^2*a2)/omega^2 I.dp[p+2,p+2] <- nw * a2 I.dp[1:p,p+1] <- sum.x * (mu.z*(1+mu.z^2*pi/2)+alpha^2*a1)/omega^2 I.dp[p+1,1:p] <- t(I.dp[1:p,p+1]) I.dp[1:p,p+2] <- sum.x * (sqrt(2/pi)/(1+alpha^2)^1.5-alpha*a1)/omega I.dp[p+2,1:p] <- t(I.dp[1:p,p+2]) I.dp[p+1,p+2] <- I.dp[p+2,p+1] <- nw*(-alpha*a2)/omega eps <- 0.005 if(abs(alpha) > eps) I.cp <- force.symmetry(t(Ddp.cp) %*% I.dp %*% Ddp.cp) else{ if(alpha == 0) I.cp <- diag(c(1/omega^2, 2/omega^2, 1/6)) else { add <- c(rep(0,p+1), 3*eps) i1 <- sn.infoUv(dp=dp+add, x=x, w=w) i2 <- sn.infoUv(dp=dp-add, x=x, w=w) I.cp <- (i1$info.cp + i2$info.cp)/2 } } score <- NULL asyvar.dp <- pd.solve(I.dp, silent=TRUE) asyvar.cp <- pd.solve(I.cp, silent=TRUE) } if(is.null(names(dp))) names(dp) <- param.names("DP", "SN", p, x.names) if(is.null(names(cp))) names(cp) <- param.names("DP", "SN", p, x.names) dimnames(I.dp) <- list(names(dp), names(dp)) if(!is.null(I.cp)) dimnames(I.cp) <- list(names(cp), names(cp)) aux <- list(Ddp.cp=Ddp.cp, a.coef=a.coef, score.cp=score) list(dp=dp, cp=cp, type=type, info.dp=I.dp, info.cp=I.cp, asyvar.dp=asyvar.dp, asyvar.cp=asyvar.cp, aux=aux, I2=I2) } sn.infoMv <- function(dp, x=NULL, y, w, norm2.tol=1e-6) {# computes observed/expected Fisher information matrix for multiv.SN variates # using results in Arellano-Valle & Azzalini (JMVA, 2008+erratum) type <- if(missing(y)) "expected" else "observed" if(type == "observed") {if(!is.matrix(y)) stop("y is not a matrix")} cp <- dp2cpMv(dp, "SN") d <- length(dp$alpha) d2 <- d*(d+1)/2 if(!is.null(x)) {if(nrow(x) != nrow(y)) stop("non-conformable x and y")} if(missing(w)) w <- rep(1, max(NROW(cbind(x,y)),1)) if(any(w != round(w)) | any(w<0)) stop("weights must be non-negative integers") n <- length(w) nw <- sum(w) if(is.null(x)) { p <- 1 xx <- sum.x <- nw x <- matrix(1, nrow=n, ncol=1) } else { p <- NCOL(x) # x <- matrix(x, n, p) xx <- drop(t(x) %*% (w*x)) sum.x <- drop(matrix(apply(w*x,2,sum))) } beta <- as.matrix(dp[[1]],p,d) Omega <- dp$Omega omega <- sqrt(diag(Omega)) alpha <- dp$alpha eta <- alpha/omega # vOmega <- Omega[lower.tri(Omega,TRUE)] Obar <- cov2cor(Omega) Obar.alpha <- as.vector(Obar %*% alpha) alpha.star <- sqrt(sum(alpha * Obar.alpha)) if(alpha.star < 1e-4) { warning("information matrix of multivariate SN not computed near alpha=0") return(NULL) } # delta.star <- alpha.star/sqrt(1+alpha.star^2) c1 <- sqrt(2/pi)/sqrt(1+alpha.star^2) c2 <- 1/(pi*sqrt(1+2*alpha.star^2)) # theta <- c(beta,vOmega,eta) D <- duplication_matrix(d) # i1 <- 1:prod(dim(beta)) # i2 <- max(i1) + 1:(d*(d+1)/2) # i3 <- max(i2) + 1:d # ind <- list(i1=i1, i2=i2, i3=i3) O.inv <- pd.solve(Omega, silent=TRUE) if(type == "observed"){ y0 <- y - x %*% beta S0 <- t(y0) %*% (w*y0) / nw y0.eta <- as.vector(y0 %*% eta) z1 <- zeta(1, y0.eta) * w z2 <- (-zeta(2, y0.eta) * w) # Z2 <- diag(z2, n) S1 <- (O.inv %x% t(x)) %*% as.vector(w*y0)- (eta %x% t(x)) %*% z1 S2 <- (nw/2) * t(D) %*% ((O.inv %x% O.inv) %*% as.vector(S0-Omega)) S3 <- t(y0) %*% z1 score <- c(S1,S2,S3) u <- t(x) %*% z1 U <- t(x) %*% (z2 * y0) V <- O.inv %*% (2*S0-Omega) %*% O.inv # terms as given in the last but one matrix of p.16 j11 <- O.inv %x% xx + outer(eta,eta) %x% (t(x) %*% (z2 *x) ) j12 <- (O.inv %x% (t(x) %*% (w*y0) %*% O.inv)) %*% D j13 <- diag(d) %x% u - eta %x% U j22 <- (nw/2) * t(D) %*% (O.inv %x% V) %*% D j23 <- matrix(0, d*(d+1)/2, d) j33 <- t(y0) %*% (z2 * y0) uaA.coef <- NULL } else { # expected information Omega.eta <- omega * Obar.alpha mu.c <- Omega.eta/alpha.star^2 Omega.c <- Omega - outer(Omega.eta, Omega.eta)/alpha.star^2 alpha.bar <- alpha.star/sqrt(1+2*alpha.star^2) ginvMills <- function(x, m=0, s=1) # generalized inverse Mills ratio: \phi(x; m, s^2)/\Phi(x) exp(-0.5*((x-m)^2/s^2-x^2)+log(zeta(1,x))-log(s)) fn.u <- function(x, sd, k) x^k * ginvMills(x,0,sd) if(alpha.bar > 0) { err<- .Machine$double.eps^0.5 u0 <- integrate(fn.u, -Inf, Inf, sd=alpha.bar, k=0, rel.tol=err)$value u1 <- integrate(fn.u, -Inf, Inf, sd=alpha.bar, k=1, rel.tol=err)$value u2 <- integrate(fn.u, -Inf, Inf, sd=alpha.bar, k=2, rel.tol=err)$value } else {u0 <- 2; u1<- u2 <- 0} a0 <- u0 a1 <- u1 * mu.c A2 <- u2 * outer(mu.c, mu.c) + u0 * Omega.c # cfr (19) A1 <- (c1*(diag(d)-outer(eta,eta) %*% Omega/(1+alpha.star^2)) - c2*outer(eta, a1)) # cfr line after (12) # terms as given in the last matrix of p.16 j11 <- (O.inv + c2*a0*outer(eta,eta)) %x% xx j12 <- c1*(O.inv %x% outer(sum.x, eta)) %*% D j13 <- A1 %x% sum.x j22 <- 0.5*nw *t(D) %*% (O.inv %x% O.inv) %*% D j23 <- matrix(0, d*(d+1)/2, d) j33 <- nw *c2 * A2 uaA.coef <- list(u0=u0, u1=u1, u2=u2, a1=a1, A1=A1, A2=A2) score <- NULL } I.theta <-rbind(cbind( j11, j12, j13), cbind(t(j12), j22, j23), cbind(t(j13), t(j23), j33)) I.theta <- force.symmetry(I.theta, tol=1e3) if(type == "observed") { score.norm2 <- sum(score * as.vector(pd.solve(I.theta) %*% score)) if(score.norm2/d > norm2.tol) stop("'dp' does not seem to be at MLE") } D32 <- matrix(0,d, d2) tmp32 <- matrix(0,d^2,d^2) for(i in 1:d){ Eii <- matrix(0,d,d) Eii[i,i] <- 1 tmp32 <- tmp32 + Eii %x% Eii } D32 <- (-0.5)* (t(eta) %x% diag(1/omega^2, d,d)) %*% tmp32 %*% D # here we use the expression given in the notes, not in the paper Dlow <- cbind(matrix(0,d,d*p), D32, diag(1/omega,d,d)) Dtheta.dp <- rbind(cbind(diag(d*p+d2), matrix(0,d*p+d2,d)), Dlow) I.dp <- t(Dtheta.dp) %*% I.theta %*% Dtheta.dp # cfr (14) I.dp <- force.symmetry(I.dp, tol=1e3) # # psi<- c(mu, vSigma, mu0) Sigma <- cp$var.cov sigma <- sqrt(diag(Sigma)) Sigma.inv <- pd.solve(Sigma) mu0 <- c1* omega * Obar.alpha beta0.sq <- as.vector(t(mu0) %*% Sigma.inv %*% mu0) beta0 <- sqrt(beta0.sq) q1 <- 1/(c1*(1+beta0.sq)) q2 <- 0.5*q1*(2*c1-q1) Dplus <- pd.solve(t(D) %*% D) %*% t(D) D23 <- Dplus %*% (diag(d) %x% mu0 + mu0 %x% diag(d)) a <- as.vector(Sigma.inv %*% mu0) D32 <- t(-a) %x% (q1 * Sigma.inv - q1*q2*outer(a,a)) %*% D D33 <- q1 * Sigma.inv - 2*q1*q2*outer(a,a) one00 <- c(1,rep(0,p-1)) Dtheta.psi <- rbind( cbind(diag(p*d), matrix(0,p*d,d2), -diag(d) %x% one00), cbind(matrix(0,d2,p*d), diag(d2), D23), cbind(matrix(0,d,p*d), D32, D33)) # cfr (22a) mu0. <- mu0/(sigma*beta0) # \bar{\mu}_0 D32. <- matrix(0, d, d2) # \tilde{D}_{32} for(i in 1:d) { Eii <- matrix(0,d,d) Eii[i,i] <- 1 D32. <- D32. + (1/sigma[i])*((t(mu0.) %*% Eii) %x% Eii) %*% D } D32. <- 0.5* beta0 * D32. D33. <- (2/(4-pi)) * diag(sigma/mu0.^2, d, d)/(3*beta0.sq) Dpsi.cp <- rbind(cbind(diag(p*d+d2), matrix(0,p*d+d2,d)), cbind(matrix(0,d,p*d), D32., D33.)) # cfr (22b) jacob <- Dtheta.psi %*% Dpsi.cp I.cp <- t(jacob) %*% I.theta %*% jacob # cfr (17) I.cp <- if(any(is.na(I.cp))) NULL else force.symmetry(I.cp) asyvar.dp <- pd.solve(I.dp, silent=TRUE) if(is.null(asyvar.dp)) se.dp <- list(NULL) else { diags.dp <- sqrt(diag(asyvar.dp)) se.beta <- matrix(diags.dp[1:(p*d)], p, d) se.diagOmega <- diags.dp[p*d + d2 +1 -rev(cumsum(1:d))] # se.omega <- se.Omega/(2*omega) se.alpha <- diags.dp[p*d +d2 +(1:d)] se.dp <- list(beta=se.beta, diagOmega=se.diagOmega, alpha=se.alpha) } asyvar.cp <- pd.solve(I.cp, silent=TRUE) if(is.null(asyvar.cp)) se.cp <- list(NULL) else { diags.cp <- sqrt(diag(asyvar.cp)) se.beta <- matrix(diags.cp[1:(p*d)], p, d) se.diagSigma <- diags.cp[p*d + d2 +1 -rev(cumsum(1:d))] # se.sigma <- se.Sigma/(2*sigma) se.gamma1 <- diags.cp[p*d + d2 +(1:d)] se.cp <- list(beta=se.beta, var=se.diagSigma, gamma1=se.gamma1) } aux <- list(info.theta=I.theta, score.theta=score, Dtheta.dp=Dtheta.dp, Dpsi.cp=Dpsi.cp, Dtheta.psi=Dtheta.psi, uaA.coef=uaA.coef) list(dp=dp, cp=cp, type=type, info.dp=I.dp, info.cp=I.cp, asyvar.dp=asyvar.dp, asyvar.cp=asyvar.cp, se.dp=se.dp, se.cp=se.cp, aux=aux) } msn.mle <- function(x, y, start=NULL, w, trace=FALSE, opt.method=c("nlminb", "Nelder-Mead", "BFGS", "CG", "SANN"), control=list() ) { y <- data.matrix(y) if(missing(x)) x <- rep(1,nrow(y)) else {if(!is.numeric(x)) stop("x must be numeric")} if(missing(w)) w <- rep(1,nrow(y)) opt.method <- match.arg(opt.method) x <- data.matrix(x) d <- ncol(y) n <- sum(w) p <- ncol(x) y.names <- dimnames(y)[[2]] x.names <- dimnames(x)[[2]] if(is.null(start)) { fit0 <- lm.wfit(x, y, w, method="qr") beta <- as.matrix(coef(fit0)) res <- resid(fit0) a <- msn.moment.fit(res) Omega <- a$Omega omega <- a$omega alpha <- a$alpha if(!a$admissible) alpha<-alpha/(1+max(abs(alpha))) beta[1,] <- beta[1,]-omega*a$delta*sqrt(2/pi) } else{ beta <- start[[1]] # start$beta Omega <- start$Omega alpha <- start$alpha omega <- sqrt(diag(Omega)) } eta <-alpha/omega if(trace){ cat("Initial parameters:\n") print(cbind(t(beta),eta,Omega)) } param <- c(beta,eta) dev <- msn.dev(param, x, y, w) if(opt.method == "nlminb") { opt <- nlminb(param, msn.dev, msn.dev.grad, control=control, x=x, y=y, w=w, trace=trace) opt$value <- opt$objective } else opt <- optim(param, fn=msn.dev, gr=msn.dev.grad, method=opt.method, control=control, x=x, y=y, w=w, trace=trace) if(trace) cat(paste("Message from optimization routine:", opt$message,"\n")) logL <- opt$value/(-2) beta <- matrix(opt$par[1:(p*d)],p,d) dimnames(beta)[2] <- list(y.names) dimnames(beta)[1] <- list(x.names) eta <- opt$par[(p*d+1):(p*d+d)] xi <- x %*% beta Omega <- t(y-xi) %*% (w*(y-xi))/n omega <- sqrt(diag(Omega)) alpha <- eta*omega # param <- cbind(omega,alpha) dimnames(Omega) <- list(y.names,y.names) names(alpha) <- y.names alpha2 <- sum(eta * as.vector(Omega %*% eta)) delta.star <- sqrt(alpha2/(1+alpha2)) # dimnames(param)[1] <- list(y.names) dp <- list(beta=beta, Omega=Omega, alpha=alpha) opt$opt.method <- opt.method aux <- list(alpha.star=sqrt(alpha2), delta.star=delta.star) list(call=match.call(), dp=dp, logL=logL, aux=aux, opt.method=opt) } msn.dev <- function(param, x, y, w, trace=FALSE) { d <- ncol(y) if(missing(w)) w <- rep(1,nrow(y)) n <- sum(w) p <- ncol(x) beta <- matrix(param[1:(p*d)],p,d) eta <- param[(p*d+1):(p*d+d)] y0 <- y-x %*% beta Omega <- (t(y0) %*% (y0*w))/n D <- diag(qr(2*pi*Omega)[[1]]) logDet <- sum(log(abs(D))) dev <- n*logDet - 2*sum(zeta(0, y0 %*% eta) * w) + n*d if(trace) { cat("\nmsn.dev:",dev,"\n","parameters:"); print(rbind(beta,eta)) } dev } msn.dev.grad <- function(param, x, y, w, trace=FALSE) { d <- ncol(y) if(missing(w)) w <- rep(1,nrow(y)) n <- sum(w) p <- ncol(x) beta <- matrix(param[1:(p*d)],p,d) eta <- param[(p*d+1):(p*d+d)] y0 <- y-x %*% beta Omega <- (t(y0) %*% (w*y0))/n p1 <- zeta(1,as.vector(y0 %*% eta)) * w Omega.inv <- pd.solve(Omega, silent=TRUE) if(is.null(Omega.inv)) return(rep(NA, p*d+d)) Dbeta <- (t(x) %*% (y0*w) %*% Omega.inv - outer(as.vector(t(x) %*% p1), eta)) Deta <- as.vector(t(y0) %*% p1) if(trace){ cat("gradient:\n") print(rbind(Dbeta,Deta))} -2*c(Dbeta,Deta) } msn.moment.fit <- function(y) {# 31-12-1997: simple fit of MSN distribution usign moments y <- as.matrix(y) k <- ncol(y) m.y <- apply(y, 2, mean) var.y <- var(y) y0 <- (t(y) - m.y)/sqrt(diag(var.y)) gamma1<- apply(y0^3, 1, mean) out <- (abs(gamma1) > 0.99527) gamma1[out] <- sign(gamma1[out])*0.995 a <- sign(gamma1)*(2*abs(gamma1)/(4-pi))^0.33333 delta <- sqrt(pi/2)*a/sqrt(1+a^2) m.z <- delta * sqrt(2/pi) omega <- sqrt(diag(var.y)/(1-m.z^2)) Omega <- var.y + outer(omega*m.z, omega*m.z) xi <- m.y-omega*m.z O.cor <- cov2cor(Omega) O.inv <- pd.solve(O.cor) tmp <- as.vector(1 - t(delta) %*% O.inv %*% delta) if(tmp<=0) {tmp <- 0.0001; admissible <- FALSE} else admissible <- TRUE alpha <- as.vector(O.inv %*% delta)/sqrt(tmp) list(xi=xi, Omega=Omega, alpha=alpha, Omega.cor=O.cor, omega=omega, delta=delta, skewness=gamma1, admissible=admissible) } st.mple <- function(x, y, dp=NULL, fixed.nu=NULL, w, penalty=NULL, trace=FALSE) { # MLE of DP for univariate ST distribution if(!is.vector(y)) stop("parameter y must be a vector") if(!is.matrix(x)) stop("parameter x must be a matrix") y.name <- deparse(substitute(y)) x.name <- deparse(substitute(x)) if(any(x[,1] != 1)) stop("first column of x must have all 1's") n <- length(y) p <- ncol(x) if(missing(w)) w <- rep(1,n) nw <- sum(w) if(is.null(dp)) { ls <- lm.wfit(x, y, w) res <- ls$residuals s <- sqrt(sum(w*res^2)/nw) gamma1 <- sum(w*res^3)/(nw*s^3) gamma2 <- sum(res^4)/(nw*s^4) - 3 cp <- c(ls$coef, s, gamma1, gamma2) dp <- st.cp2dp(cp, silent=TRUE) if(is.null(dp)) dp <- rep(NA,length(cp)) if(any(is.na(dp))) dp <- c(cp[1:(p+1)], 0, 10) if(!is.null(fixed.nu)) dp <- dp[-length(dp)] } else{ if(length(dp) != (p+2+as.numeric(is.null(fixed.nu)))) stop("arg 'dp' has wrong length")} if(trace) cat("dp (starting values) =", format(dp), "\n") tiny <- sqrt(.Machine$double.eps) opt <- nlminb(dp, objective=st.pdev, gradient=st.pdev.gh, # do NOT set: hessian=st.dev.hessian, lower=c(-rep(Inf,p), tiny, -Inf, tiny), upper=c(rep(Inf,p+3)), x=x, y=y, fixed.nu=fixed.nu, w=w, penalty=penalty, trace=trace) dp <- opt$par rv.comp <- c(TRUE, TRUE, is.null(fixed.nu)) names(dp) <- param.names("DP", "ST", p=p, x.names=colnames(x)[-1], rv.comp) logL <- (-opt$objective)/2 boundary <- as.logical(abs(dp[p+2]) > 1000) # AA, must improve this rule if(is.null(fixed.nu)) boundary <- (boundary | dp[p+3] > 10^3) if(trace) { cat("Message from optimization routine (nlminb):", opt$message, "\n") cat("estimates (dp):", dp, "\n") cat("log-likelihood:", logL, "\n") } list(call=match.call(), dp=dp, fixed.nu=fixed.nu, logL=logL, boundary=boundary, opt.method=opt) } st.pdev <- function(dp, x, y, fixed.nu=NULL, w=1, penalty=NULL, trace=FALSE) { # computes "penalized deviance"=-2*(logL-Q) for ST p <- ncol(x) xi <- as.vector(x %*% matrix(dp[1:p],p,1)) nu <- if(is.null(fixed.nu)) dp[p+3] else fixed.nu logL <- sum(w * dst(y, xi, dp[p+1], dp[p+2], nu, log=TRUE)) Q <- if(is.null(penalty)) 0 else penalty(dp[p+2], nu, der=0) if(trace) cat("st.pdev: (dp,pdev) =", format(c(dp, -2*(logL-Q))),"\n") return(-2 * (logL - Q)) } st.pdev.gh <- function(dp, x, y, fixed.nu=NULL, w=1, penalty=NULL, trace=FALSE, hessian=FALSE) { # computes gradient and hessian of (penalized) deviance for ST p <- ncol(x) n <- nrow(x) beta <- dp[1:p] omega <- dp[p+1] alpha <- dp[p+2] nu <- if(is.null(fixed.nu)) dp[p+3] else fixed.nu npar <- p + 2 + as.numeric(is.null(fixed.nu)) score <- numeric(npar) xi <- as.vector(x %*% beta) z <- (y-xi)/omega nuz2 <- (nu+z^2) loro.tau <- sqrt((nu+1)/nuz2) zt <- z * loro.tau log.pdf <- dt(alpha*zt, nu+1, log=TRUE) log.cdf <- pt(alpha*zt, nu+1, log.p=TRUE) cdf <- exp(log.cdf) loro.w <- exp(log.pdf - log.cdf) tw <- loro.tau * loro.w zwz2 <- z*(z^2-1)*loro.w/loro.tau wi.beta <- z*loro.tau^2 - nu*alpha*tw/(nu+z^2) score[1:p] <- apply(w*x*wi.beta, 2, sum)/omega score[p+1] <- sum(w * (-1 + zt^2 -alpha*nu*z*tw/(nu+z^2)))/omega score[p+2] <- sum(w*z*tw) if(is.null(fixed.nu)){ fun.g <- function(x, nu1) dt(x,nu1) * (((nu1+1)*x^2)/(nu1*(nu1+x^2)) - log1p(x^2/nu1)) int.g <- numeric(n) for (i in 1:n) int.g[i] <- integrate(fun.g, -Inf, alpha*zt[i], nu1=nu+1)$value score[p+3] <- 0.5 * sum(w * (digamma(1+nu/2) -digamma(nu/2) - (2*nu+1)/(nu*(nu+1)) -log1p(z^2/nu) + zt^2/nu + alpha*zwz2/(nu+z^2)^2 + int.g/cdf)) } if(is.null(penalty)) { Q<- 0 attr(Q, "der1") <- rep(0,2) attr(Q, "der2") <- matrix(rep(0,4), 2, 2) } else Q <- penalty(alpha, nu, der=1+as.numeric(hessian)) score[(p+2):(p+3)] <- score[(p+2):(p+3)] - attr(Q, "der1") score <- score[1:npar] gradient <- (-2)*score if(hessian){ info <- matrix(NA, npar, npar) w.z <- (-nu*(nu+2)*alpha^2*z*loro.w/((nu+z^2*(1+alpha^2))*nuz2) -nu*alpha*loro.tau*loro.w^2/nuz2) w.alpha <- (-(nu+2)* alpha*z^2*loro.w/(nu+z^2*(1+alpha^2)) -zt*loro.w^2) S.z <- (-z*loro.tau^2 + alpha*nu*tw/nuz2) S.zz <- (2*zt^2/nuz2 - loro.tau^2 -3*alpha*nu*z*tw/nuz2^2 +alpha*nu*loro.tau*w.z/nuz2) info[1:p,1:p] <- t(-S.zz *x) %*% (w*x)/omega^2 info[1:p,p+1] <- info[p+1,1:p] <- apply(-w*(S.zz*z + S.z)*x, 2,sum)/omega^2 info[p+1,p+1] <- -sum(w*(1 + z^2*S.zz + 2*z*S.z))/omega^2 S.za <- nu*loro.tau*(loro.w +alpha*w.alpha)/nuz2 info[1:p,p+2] <- info[p+2,1:p] <- apply(w*S.za*x, 2, sum)/omega info[p+1,p+2] <- info[p+2,p+1] <- sum(w*z*S.za)/omega info[p+2,p+2] <- sum(-w*zt*w.alpha) + attr(Q,"der2")[1,1] if(is.null(fixed.nu)) { w.nu <- (0.5*loro.w*((nu+2)*(alpha*z)^2/((nu+z^2*(1+alpha^2))*nuz2) - log1p((alpha*z)^2/nuz2) - int.g/cdf) - 0.5*alpha*zwz2*loro.w/nuz2^2) S.znu <- (z*(1-z^2)/nuz2^2 + alpha*nu*loro.tau*w.nu/nuz2 + alpha*(nu*(3*z^2-1)+2*z^2)*loro.w/(2*loro.tau*nuz2^3)) info[1:p,p+3] <- info[p+3,1:p] <- apply(w* S.znu*x, 2, sum)/omega info[p+1,p+3] <- info[p+3,p+1] <- sum(w*z*S.znu)/omega info[p+2,p+3] <- info[p+3,p+2] <- -sum(w*(0.5*zwz2/nuz2^2 + zt*w.nu)) fun.b <- function(x, nu1) dt(x,nu1) * (((nu1+1)*x^2)/(nu1*(nu1+x^2)) - log1p(x^2/nu1))^2 fun.d <- function(x, nu1) dt(x,nu1) * x^2*((nu1-1)*x^2-2*nu1)/(nu1^2*(nu1+x^2)^2) int.b <- int.d <- numeric(n) for (i in 1:n) { int.b[i] <- integrate(fun.b, -Inf, alpha*zt[i], nu1=nu+1)$value int.d[i] <- integrate(fun.d, -Inf, alpha*zt[i], nu1=nu+1)$value } info[p+3,p+3] <- -sum(w*( (trigamma(nu/2+1) - trigamma(nu/2))/4 + (2*nu^2+2*nu+1)/(2*(nu*(nu+1))^2) + z^2/(2*nu*nuz2) - z^2*(nu^2+2*nu+z^2)/(2*nu^2*nuz2^2) - alpha*zwz2*(z^2+4*nu+3)/(4*(nu+1)*nuz2^3) + alpha*z*(1-loro.tau^2)*w.nu/(2*loro.tau*nuz2) - (int.g/(2*cdf))^2 - alpha*zwz2*int.g/(4*cdf*nuz2^2) + (2*int.d + int.b)/(4*cdf) + (alpha*zwz2/(4*nuz2^2))* ((nu+2)*alpha^2*z^2/((nu+1)*(nu+z^2*(1+alpha^2))) - log1p((alpha*z)^2/nuz2)) )) info[p+2,p+3] <- info[p+2,p+3] + attr(Q,"der2")[1,2] info[p+3,p+2] <- info[p+3,p+2] + attr(Q,"der2")[2,1] info[p+3,p+3] <- info[p+3,p+3] + attr(Q,"der2")[2,2] } attr(gradient,"hessian") <- force.symmetry(2*info) if(trace) cat("Hessian matrix has been computed\n") } if(trace) cat("st.pdev.gh: gradient = ", format(gradient),"\n") return(gradient) } st.pdev.hessian <- function(dp, x, y, fixed.nu=NULL, w=1, trace=FALSE) attr(st.pdev.gh(dp, x, y, fixed.nu, w, trace=trace, hessian=TRUE), "hessian") st.infoUv <- function(dp=NULL, cp=NULL, x=NULL, y, fixed.nu=NULL, w, penalty=NULL, norm2.tol=1e-06) {# computes observed Fisher information matrix for univariatate ST variates if(missing(y)) stop("y is missing") if(!is.numeric(y)) stop("y is non-numeric") type <- "observed" if(is.null(dp) & is.null(cp)) stop("either dp or cp must be set") if(!is.null(dp) & !is.null(cp)) stop("cannot set both dp and cp") # if(is.null(cp)) cp <- st.dp2cp(c(dp, fixed.nu)) # completa DP se necessario if(is.null(dp)) dp <- st.cp2dp(cp) # AA, CP deve essere comunque completo if(missing(w)) w <- rep(1, max(NROW(cbind(x,y)),1)) if(any(w != round(w)) | any(w<0)) stop("weights must be non-negative integers") npar <- length(dp) n <- length(w) nw <- sum(w) nu <- if(is.null(fixed.nu)) dp[npar] else fixed.nu if(is.null(x)) { n <- if(is.null(y)) 1 else NROW(y) p <- 1 xx <- sum.x <- nw x <- matrix(1, nrow=n, ncol=1) } else { p <- NCOL(x) # x <- matrix(x, n, p) xx <- t(x) %*% (w * x) sum.x <- matrix(apply(x,2,sum)) } score <- st.pdev.gh(dp, x, y, fixed.nu, w, trace=FALSE, hessian=TRUE) I.dp <- attr(score, "hessian")/2 if(sum(score * as.vector(solve(I.dp) %*% score)) > norm2.tol*npar) { warning("'dp' does not seem to be at MLE; score not quite 0") cat("score(dp): ", score, "\n") cat("norm(score)^2:", sum(score * as.vector(solve(I.dp) %*% score)),"\n") } attr(score, "hessian") <- NULL dimnames(I.dp) <- list(names(dp), names(dp)) asyvar.dp <- pd.solve(I.dp, silent=TRUE) aux <- list(score.dp=score) if(nu > 3) { cp <- st.dp2cp(dp=c(dp,fixed.nu), cp.type="proper", fixed.nu=fixed.nu, upto=if(is.null(fixed.nu)) 4 else 3, jacobian=TRUE) Dcp.dp <- attr(cp, "jacobian") attr(cp, "deriv") <- NULL if(!is.null(fixed.nu)) { Dcp.dp <- Dcp.dp[1:npar, 1:npar] cp <- cp[1:npar] } Ddp.cp <- solve(Dcp.dp) I.cp <- force.symmetry(t(Ddp.cp) %*% I.dp %*% Ddp.cp) dimnames(I.cp) <- list(names(cp), names(cp)) asyvar.cp <- pd.solve(I.cp) aux$Dcp.dp <- Dcp.dp aux$Ddp.cp <- Ddp.cp } else { I.cp <- NULL asyvar.cp <- NULL aux <- NULL } list(dp=dp, cp=cp, type=type, info.dp=I.dp, info.cp=I.cp, asyvar.dp=asyvar.dp, asyvar.cp=asyvar.cp, aux=aux) } param.names <- function(param.type, family="SN", p=1, x.names=NULL, rv.comp) {# NB: x.names= names of covariates except intercept; # rv.comp=random variable components (those not part of the regression model) if(!(param.type %in% c("DP","CP","pseudo-CP"))) stop("invalid param.type") if(!(family %in% c("SN", "ESN", "ST", "SC"))) stop("unknown family") if(p > 1 && (length(x.names) < (p-1))) x.names <- outer("x", as.character(1L:(p-1)), paste, sep=".") if(param.type == "DP"){ name0 <- if(p > 1) "(Intercept.DP)" else "xi" par.names <- c(name0, x.names, "omega", "alpha") if(family == "ESN") par.names <- c(par.names, "tau") if(family == "ST") par.names <- c(par.names, "nu") } if(param.type == "CP"){ name0 <- if(p > 1) "(Intercept.CP)" else "mean" par.names <- c(name0, x.names, "s.d.", "gamma1") if(family == "ESN") par.names <- c(par.names, "tau") if(family == "ST") par.names <- c(par.names, "gamma2") } if(param.type == "pseudo-CP"){ if(!(family %in% c("ST", "SC"))) stop("pseudo-CP makes sense only for ST and SC families") name0 <- if(p > 1) "(Intercept.CP~)" else "mean~" par.names <- c(name0, x.names, "s.d.~", "gamma1~") if(family == "ST") par.names <- c(par.names, "gamma2~") } if(missing(rv.comp)) rv.comp <- rep(TRUE, length(par.names)-p) par.names[c(rep(TRUE,p), rv.comp)] } mst.mple <- function (x, y, start=NULL, w, penalty, fixed.nu = NULL, trace = FALSE, opt.method = c("nlminb", "Nelder-Mead", "BFGS", "CG", "SANN"), control = list()) { opt.method <- match.arg(opt.method) y.name <- deparse(substitute(y)) y.names <- dimnames(y)[[2]] y <- data.matrix(y) x <- if (missing(x)) matrix(rep(1, nrow(y)), ncol = 1) else data.matrix(x) if (missing(w)) w <- rep(1, nrow(y)) x.names <- dimnames(x)[[2]] d <- ncol(y) n <- sum(w) p <- ncol(x) if (is.null(start)) { ls <- lm.wfit(x, y, w, singular.ok=FALSE) beta <- coef(ls) Omega <- var(resid(ls)) omega <- sqrt(diag(Omega)) alpha <- rep(0, d) nu <- if(is.null(fixed.nu)) 8 else fixed.nu if (trace) cat("mst.mple: starting dp=", c(beta, Omega[!upper.tri(Omega)], alpha, nu), "\n") } else { if (!is.null(fixed.nu)) start$nu <- fixed.nu if (all(names(start)[2:4] == c("Omega", "alpha", "nu"))) { beta <- start[[1]] # was start$beta Omega <- start$Omega alpha <- start$alpha nu <- start$nu } else stop("argument 'start' is not in the form that I expected") } param <- dplist2optpar(list(beta=beta, Omega=Omega, alpha=alpha)) if(is.null(fixed.nu)) param <- c(param, log(nu)) if(opt.method == "nlminb") { opt <- nlminb(param, objective = mst.pdev, gradient = mst.pdev.grad, control = control, x = x, y = y, w = w, penalty=penalty, fixed.nu = fixed.nu, trace = trace) # info <- num.deriv2(opt$par, FUN="mst.dev.grad", X=X, y=y, # w=w, fixed.nu = fixed.nu)/2 opt$value <- opt$objective } else { opt <- optim(param, fn = mst.pdev, gr = mst.pdev.grad, method = opt.method, control = control, hessian = TRUE, x = x, y = y, w = w, penalty=penalty, fixed.nu = fixed.nu, trace = trace) info <- opt$hessian/2 } dev <- opt$value param <- opt$par opt$opt.method <- opt.method if (trace) { cat("Message from optimization routine:", opt$message, "\n") cat("deviance:", dev, "\n") } dp.list <- optpar2dplist(opt$par, d, p, x.names, y.names) dp <- dp.list$dp alpha2 <- sum(dp$alpha * as.vector(cov2cor(dp$Omega) %*% dp$alpha)) delta.star <- sqrt(alpha2/(1+alpha2)) aux <- list(fixed.nu=fixed.nu, alpha.star=sqrt(alpha2), delta.star=delta.star) boundary <- ((1 - delta.star) < .Machine$double.eps^(1/4)) if(is.null(fixed.nu)) boundary <- (boundary | dp[[4]] > 10^3) list(call=match.call(), dp=dp, logL = -dev/2, boundary=boundary, aux=aux, opt.method = opt) } mst.pdev <- function(param, x, y, w, fixed.nu=NULL, penalty=NULL, trace=FALSE) { if(missing(w)) w <- rep(1,nrow(y)) dp.list <- optpar2dplist(param, ncol(y), ncol(x)) dp <- dp.list$dp nu <- if(is.null(fixed.nu)) dp$nu else fixed.nu logL <- sum(w * dmst(y, x %*% dp$beta, dp$Omega, dp$alpha, nu, log=TRUE)) Q <- if(is.null(penalty)) 0 else penalty(list(alpha=dp$alpha, Omega.bar=cov2cor(dp$Omega)), nu, der=0) pdev <- (-2) * (logL - Q) if(trace) cat("mst.pdev: ", pdev, "\nparam:", format(param), "\n") pdev } mst.pdev.grad <- function(param, x, y, w, fixed.nu=NULL, penalty=NULL, trace=FALSE) { d <- ncol(y) p <- ncol(x) beta<- matrix(param[1:(p*d)],p,d) D <- exp(-2*param[(p*d+1):(p*d+d)]) A <- diag(d) i0 <- p*d+d*(d+1)/2 if(d>1) A[!lower.tri(A,diag=TRUE)] <- param[(p*d+d+1):i0] eta <- param[(i0+1):(i0+d)] nu <- if(is.null(fixed.nu)) exp(param[i0+d+1]) else fixed.nu Oinv <- t(A) %*% diag(D,d,d) %*% A u <- y - x %*% beta Q <- as.vector(apply((u %*% Oinv)*u,1,sum)) L <- as.vector(u %*% eta) sf <- if(nu<10000) sqrt((nu+d)/(Q+nu)) else sqrt((1+d/nu)/(1+Q/nu)) t. <- L*sf dlogft<- (-0.5)*(1+d/nu)/(1+Q/nu) dt.dL <- sf dt.dQ <- (-0.5)*L*sf/(Q+nu) logT. <- pt(t., nu+d, log.p=TRUE) dlogT.<- exp(dt(t., nu+d, log=TRUE) - logT.) u.w<- u*w Dbeta <- (-2* t(x) %*% (u.w*dlogft) %*% Oinv - outer(as.vector(t(x) %*% (dlogT. * dt.dL* w)), eta) - 2* t(x) %*% (dlogT.* dt.dQ * u.w) %*% Oinv ) Deta <- apply(dlogT.*sf*u.w, 2, sum) if(d>1) { M <- 2*( diag(D,d,d) %*% A %*% t(u * dlogft + u * dlogT. * dt.dQ) %*% u.w) DA <- M[!lower.tri(M,diag=TRUE)] } else DA<- NULL M <- ( A %*% t(u*dlogft + u*dlogT.*dt.dQ) %*% u.w %*% t(A)) if(d>1) DD <- diag(M) + 0.5*sum(w)/D else DD <- as.vector(M + 0.5*sum(w)/D) grad <- (-2)*c(Dbeta,DD*(-2*D),DA,Deta) if(is.null(fixed.nu)) { df0 <- min(nu, 1e8) if(df0 < 10000){ diff.digamma <- digamma((df0+d)/2) - digamma(df0/2) log1Q<- log(1+Q/df0) } else { diff.digamma <- log1p(d/df0) log1Q <- log1p(Q/df0) } dlogft.ddf <- 0.5 * (diff.digamma - d/df0 + (1+d/df0)*Q/((1+Q/df0)*df0) - log1Q) eps <- 1.0e-4 df1 <- df0 + eps sf1 <- if(df0 < 1e4) sqrt((df1+d)/(Q+df1)) else sqrt((1+d/df1)/(1+Q/df1)) logT.eps <- pt(L*sf1, df1+d, log.p=TRUE) dlogT.ddf <- (logT.eps-logT.)/eps Ddf <- sum((dlogft.ddf + dlogT.ddf)*w) grad <- c(grad, -2*Ddf*df0) } if(!is.null(penalty)) { Ainv <- backsolve(A, diag(d)) Omega <- Ainv %*% diag(1/D,d,d) %*% t(Ainv) omega <- diag(Omega) alpha <- eta*omega Q <- Qpenalty(list(alpha, cov2cor(Omega)), nu, der=1) comp <- 1:(length(alpha)+is.null(fixed.nu)) Qder <- attr(Q, "der1") * c(1/omega, 1)[comp] # gradient for transformed variable (alpha --> eta) grad <- grad + 2*c(rep(0, p*d + d*(d+1)/2), Qder) } if(trace) cat("mst.pdev.grad: norm is ", format(sqrt(sum(grad^2))), "\n") return(grad) } mst.theta.jacobian <- function(theta, p, d, cp.type="proper") { # jacobian matrices associated to transformations from # theta=c(beta, vech(Omega), eta, nu) to DP, CP and other parameterizations cp.type <- match.arg(cp.type, c("proper", "pseudo")) k1 <- p * d k2 <- k1 + d*(d+1)/2 k3 <- k2 + d k4 <- k3 + 1 if(length(theta) != k4) stop("mismatch in the arguments") block1 <- 1:k1 block2 <- (k1+1):k2 block3 <- (k2+1):k3 block4 <- k4 beta <- matrix(theta[block1], p, d) Omega <- vech2mat(theta[block2]) Omega.inv <- pd.solve(Omega) eta <- theta[block3] nu <- theta[block4] a.incr <- if(cp.type=="proper") rep(0,4) else 1:4 omega <- sqrt(diag(Omega)) alpha <- eta*omega # delta <- delta.etc(alpha, Omega)$delta D <- duplication_matrix(d) P <- matrix(0, d^2, d^2) for (i in 1:d) { Eii <- matrix(0,d,d) Eii[i,i] <- 1 P <- P + Eii %x% Eii } omega <- sqrt(diag(Omega)) d <- length(omega) delta.plus <- delta.etc(alpha, Omega) delta <- delta.plus$delta delta.sq <- (delta.plus$delta.star)^2 alpha.sq <- (delta.plus$alpha.star)^2 a <- function(nu) nu/(nu-2) u <- function(nu) 0.5*(1/nu + digamma((nu-1)/2) - digamma(nu/2)) c1 <- function(nu) b(nu)/sqrt(1 + alpha.sq) q1 <- function(nu) a(nu)/(c1(nu)*(1 + beta0.sq(nu))) q2 <- function(nu) q1(nu)*(2*c1(nu) - q1(nu))/(2*a(nu)) beta0.sq <- function(nu) # beta0.sq = sum(mu0 * Sigma.inv_mu0) = b(nu)^2 * alpha.sq/(a(nu)+(a(nu)-b(nu)^2)*alpha.sq) #-- Dtheta.dp = D_{DP}\theta Dtheta.dp <- diag(k4) diag(Dtheta.dp)[block3] <- 1/omega Deta.vOmega <- (-0.5)* (t(eta) %x% diag(1/omega^2, d, d)) %*% P %*% D Dtheta.dp[block3, block2] <- Deta.vOmega # mu0 <- function(nu) omega * b(nu) * delta Sigma.etc <- function(nu) { mu0. <- mu0(nu) Omega.inv_mu0 <- as.vector(Omega.inv %*% mu0.) Sigma <- a(nu)*Omega - outer(mu0., mu0.) sigma <- sqrt(diag(Sigma)) tmp <- a(nu) - sum(mu0. *Omega.inv_mu0) Sigma.inv_mu0 <- Omega.inv_mu0/tmp Sigma.inv <- (Omega.inv + outer(Omega.inv_mu0, Omega.inv_mu0)/tmp)/a(nu) list(Sigma=Sigma, Sigma.inv=Sigma.inv, Sigma.inv_mu0=Sigma.inv_mu0, sigma=sigma) } Dq1.nu <- function(nu){ beta0_sq <- beta0.sq(nu) (-2/(nu-2)^2 -a(nu)*(b(nu)^2*u(nu)+beta0_sq/((nu-2)^2*(1+beta0_sq))) /c1(nu)^2)/(c1(nu)*(1+beta0_sq)) } # blocks for D_{\Psi}\theta Dplus <- solve(t(D)%*% D) %*% t(D) DvOmega.vSigma <- function(nu) diag(d*(d+1)/2)/a(nu) DvOmega.mu0 <- function(nu) Dplus %*% (diag(d) %x% mu0(nu) + mu0(nu) %x% diag(d))/a(nu) DvOmega.nu <- function(nu){ s <- Sigma.etc(nu) 2*vech(s$Sigma + outer(mu0(nu), mu0(nu)))/nu^2 } Deta.vSigma <- function(nu) { S <- Sigma.etc(nu) t(-S$Sigma.inv_mu0) %x% (q1(nu)* S$Sigma.inv - q1(nu) * q2(nu) *outer(S$Sigma.inv_mu0, S$Sigma.inv_mu0)) %*% D } Deta.mu0 <- function(nu) { S <- Sigma.etc(nu) q1(nu) * (S$Sigma.inv - 2*q2(nu)*outer(S$Sigma.inv_mu0, S$Sigma.inv_mu0)) } Deta.nu <- function(nu) Dq1.nu(nu) * Sigma.etc(nu)$Sigma.inv_mu0 #-- Dtheta.phi(phi)= D_{\Psi}\theta one00 <- c(1,rep(0,p-1)) Dtheta.phi <- diag(k4) Dtheta.phi[block1, block3] <- -diag(d) %x% one00 Dtheta.phi[block2, block2] <- DvOmega.vSigma(nu+a.incr[2]) Dtheta.phi[block2, block3] <- DvOmega.mu0(nu+a.incr[2]) Dtheta.phi[block2, block4] <- DvOmega.nu(nu+a.incr[2]) Dtheta.phi[block3, block2] <- Deta.vSigma(nu+a.incr[2]) Dtheta.phi[block3, block3] <- Deta.mu0(nu+a.incr[2]) Dtheta.phi[block3, block4] <- Deta.nu(nu +a.incr[2]) # # blocks for D_{\Psi}CP Dgamma2M.misc <- function(nu){ beta0_sq <- beta0.sq(nu) s <- Sigma.etc(nu) nu.34 <- (nu-3)*(nu-4) tmp2 <- ( (d+2)/nu.34 + beta0_sq * (2*nu/((nu-3)*b(nu)^2) - (3*(nu-3)^2-6)/nu.34 )) Dgamma2M.mu0 <- as.vector(8 * tmp2 * t(s$Sigma.inv_mu0)) Dgamma2M.vSigma <- (-4 * tmp2) * as.vector(( t(s$Sigma.inv_mu0) %x% t(s$Sigma.inv_mu0)) %*% D) R <- b(nu)^2*delta.sq*(nu-2)/nu R1R <- R/(1-R) PDgamma2.nu <- (-2*d*(d+2)/(nu-4)^2 -4*((2*nu-7)/nu.34^2) *R1R*(2/(1-R)+d) +2*(2*((nu-3)-nu*(1+2*(nu-3)*u(nu)))/((nu-3)*b(nu))^2 +(3*nu^2-22*nu+41)/nu.34^2)*R1R^2) #\ref{f:partial_gamma2.nu} list(Dgamma2M.vSigma=Dgamma2M.vSigma, Dgamma2M.mu0=Dgamma2M.mu0, PDgamma2.nu=PDgamma2.nu) } Dgamma1.misc <- function(nu) { sigma <- Sigma.etc(nu)$sigma lambda <- mu0(nu)/sigma g.nu <- 3/(nu-3) h.nu <- 1 + nu*(1-1/b(nu)^2)/(nu-3) Q <- g.nu*diag(d) + 3*h.nu*diag(lambda^2) Dgamma1.vOmega <- (t(-lambda/2) %x% (Q %*% diag(1/sigma^2,d))) %*% P %*% D Dgamma1.mu0 <- Q %*% diag(1/sigma,d) # K_{33} Dgamma1.nu <- (-3*lambda/(nu-3)^2 + (-3*(1-1/b(nu)^2)/(nu-3)^2 + 2*nu*u(nu)/((nu-3)*b(nu)^2))*lambda^3) # K_{34} list(Dgamma1.vOmega=Dgamma1.vOmega, Dgamma1.mu0=Dgamma1.mu0, Dgamma1.nu=Dgamma1.nu) } # #-- # Dcp.phi(phi) = D_{\Psi}(CP) [in the notes] = D_{\phi}\bar\rho [paper] # Dcp.phi <- diag(k4) K3 <- Dgamma1.misc(nu+a.incr[3]) K4 <- Dgamma2M.misc(nu+a.incr[4]) Dcp.phi[block3,block2] <- K3$Dgamma1.vOmega Dcp.phi[block3,block3] <- K3$Dgamma1.mu0 Dcp.phi[block3,block4] <- K3$Dgamma1.nu Dcp.phi[block4,block2] <- K4$Dgamma2M.vSigma Dcp.phi[block4,block3] <- K4$Dgamma2M.mu0 Dcp.phi[block4,block4] <- K4$PDgamma2.nu # # Dtheta.cp <- Dtheta.phi %*% solve(Dcp.phi) list(Dtheta.dp=Dtheta.dp, Dtheta.cp= Dtheta.phi %*% solve(Dcp.phi), Dtheta.phi=Dtheta.phi, Dcp.phi=Dcp.phi) } # mst.vdp2vcp <- function(vdp, p, d, cp.type="proper") { # vdp = c(betaDP, vech(Omega), alpha, nu), # vcp=(betaCP, vech(Sigma), gamma1, gamma2M) # d=ncol(y), p=ncol(x) beta <- matrix(vdp[1:(p*d)], p, d) vOmega <- vdp[(p*d+1):(p*d+d*(d+1)/2)] Omega <- vech2mat(vOmega) # omega <- sqrt(diag(Omega)) alpha <- vdp[(p*d+d*(d+1)/2+1):(p*d+d*(d+1)/2+d)] nu <- vdp[p*d+d*(d+1)/2+d+1] dp <- list(beta=beta, Omega=Omega, alpha=alpha, nu=nu) cp <- mst.dp2cp(dp, cp.type=cp.type) c(cp[[1]], vech(cp[[2]]), cp[[3]], cp[[4]]) } # mst.logL <- function(vdp, X, y, dp=TRUE) { # calcola logL rispetto a DP (se dp=TRUE) oppure a theta (se dp=FALSE) n <- nrow(y) d <- ncol(y) if(missing(X)) X <- matrix(1,n,1) p <- ncol(X) beta <- matrix(vdp[1:(p*d)], p, d) vOmega <- vdp[(p*d+1):(p*d+d*(d+1)/2)] Omega <- vech2mat(vOmega) # if(any(eigen(Omega)$values <= 0)) return(NA) if(any(diag(Omega) <= 0)) return(-Inf) omega <- sqrt(diag(Omega)) tmp <- vdp[(p*d+d*(d+1)/2+1):(p*d+d*(d+1)/2+d)] alpha <- if(dp) tmp else tmp*omega nu <- vdp[p*d+d*(d+1)/2+d+1] if(nu <= 0) return(-Inf) y0 <- (y - X %*% beta) sum(dmst(y0, rep(0,d), Omega, alpha, nu, log=TRUE)) } st.infoMv <- function(dp, x=NULL, y, fixed.nu=NULL, w, penalty=NULL, norm2.tol=1e-06) {# Computes observed Fisher information matrices for multiv.ST distribution # using expressions of score function of Arellano-Valle (2010, Metron), # followed by numerical differentiation. Expected info not implemented. # Info matrices are computed for DP, CP and pseudo-CP if(missing(y)) stop("missing y") if(!is.matrix(y)) stop("y is not matrix") type <- "observed" d <- length(dp$alpha) d2 <- d*(d+1)/2 if(missing(w)) w <- rep(1, nrow(cbind(x,y))) if(any(w != round(w)) || any(w<0)) stop("weights must be non-negative integers") n <- length(w) nw <- sum(w) if(is.null(x)) { p <- 1 xx <- sum.x <- nw x <- matrix(1, nrow=n, ncol=1) } else { p <- NCOL(x) # x <- matrix(x, n, p) xx <- drop(t(x) %*% (w*x)) sum.x <- drop(matrix(apply(w*x,2,sum))) } beta <- as.matrix(dp[[1]], p, d) Omega <- dp$Omega omega <- sqrt(diag(Omega)) alpha <- dp$alpha eta <- alpha/omega nu <- if(is.null(fixed.nu)) dp[[4]] else fixed.nu Obar <- cov2cor(Omega) Obar.alpha <- as.vector(Obar %*% alpha) alpha.star <- sqrt(sum(alpha * Obar.alpha)) # =\sqrt{\eta\T\Omega\eta} theta <- as.numeric(c(beta, vech(Omega), eta, nu)) # H <- force.symmetry(-hessian(mst.logL, theta, X=x, y=y, dp=FALSE)) ? H <- (-hessian(mst.logL, theta, X=x, y=y, dp=FALSE)) J <- mst.theta.jacobian(theta, p=NCOL(x), d=NCOL(y)) s <- 1:(length(theta) - as.numeric(!is.null(fixed.nu))) # I.dp <- force.symmetry(t(J$Dtheta.dp[s,s]) %*% H[s,s] %*% J$Dtheta.dp[s,s]) I.dp <- t(J$Dtheta.dp[s,s]) %*% H[s,s] %*% J$Dtheta.dp[s,s] asyvar.dp <- pd.solve(I.dp, silent=TRUE) if(is.null(asyvar.dp)) { warning("Condition 'information_matrix > 0' fails, DP seems not at MLE") se.dp <- list(NULL) } else { diags.dp <- sqrt(diag(asyvar.dp)) se.beta <- matrix(diags.dp[1:(p*d)], p, d) se.diagOmega <- diags.dp[p*d + d2 +1 - rev(cumsum(1:d))] se.alpha <- diags.dp[p*d +d2 +(1:d)] se.dp <- list(beta=se.beta, diagOmega=se.diagOmega, alpha=se.alpha) if(is.null(fixed.nu)) se.dp$nu<- diags.dp[p*d +d2 + d +1] } if(nu>4) { cp <- mst.dp2cp(dp, cp.type="proper", fixed.nu=fixed.nu) I.cp <- force.symmetry(t(J$Dtheta.cp[s,s]) %*% H[s,s] %*% J$Dtheta.cp[s,s]) asyvar.cp <- pd.solve(I.cp, silent=TRUE) if(is.null(asyvar.cp)) { se.cp <- list(NULL) } else { diags.cp <- sqrt(diag(asyvar.cp)) se.beta <- matrix(diags.cp[1:(p*d)], p, d) se.diagSigma <- diags.cp[p*d + d2 +1 - rev(cumsum(1:d))] # se.sigma <- se.Sigma/(2*sigma) se.gamma1 <- diags.cp[p*d + d2 +(1:d)] se.cp <- list(beta=se.beta, var=se.diagSigma, gamma1=se.gamma1) if(is.null(fixed.nu)) se.cp$gamma2 <- diags.cp[p*d +d2 + d +1] }} else I.cp <- asyvar.cp <- se.cp <- cp <- NULL if(is.null(asyvar.dp)) { asyvar.pcp <- NULL se.pcp <- list(NULL) Jp <- NULL } else { dp1 <- dp if(length(dp1) < 4) dp1$nu <- fixed.nu vdp1 <- as.numeric(c(dp1[[1]], vech(dp1[[2]]), dp1[[3]], dp1[[4]])) Jp <- jacobian(mst.vdp2vcp, vdp1, p=ncol(x), d=ncol(y), cp.type="pseudo") asyvar.pcp <- (Jp[s,s]) %*% asyvar.dp %*% t(Jp[s,s]) diags.pcp <- sqrt(diag(asyvar.pcp)) se.beta <- matrix(diags.pcp[1:(p*d)], p, d) se.diagSigma <- diags.pcp[p*d + d2 +1 - rev(cumsum(1:d))] # se.sigma <- se.Sigma/(2*sigma) se.gamma1 <- diags.pcp[p*d + d2 +(1:d)] se.pcp <- list(beta=se.beta, var=se.diagSigma, gamma1=se.gamma1) if(is.null(fixed.nu)) se.pcp$gamma2 <- diags.pcp[p*d +d2 + d +1] } aux <- list(Dpseudocp.dp=Jp[s,s]) list(dp=dp, cp=cp, type=type, info.dp=I.dp, info.cp=I.cp, asyvar.dp=asyvar.dp, asyvar.cp=asyvar.cp, asyvar.p_cp=asyvar.pcp, se.dp=se.dp, se.cp=se.cp, se.p_cp=se.pcp, aux=aux) } complete.dp <- function(obj) {# fills 'dp' with fixed dp components of selm/mselm object if(length(obj@param$fixed) == 0) return(slot(obj, "param")$dp) if(obj@family %in% c("SN", "SC")) stop("this should not happen, please report") if(obj@family == "ST") { nu <- as.numeric(slot(obj, "param")$fixed['nu']) dp <- slot(obj, "param")$dp if(slot(obj, "size")["d"] == 1) dp <- c(dp, nu=nu) else dp$nu <- nu return(dp) } stop("invalid object") } sn.mple <- function(x, y, cp=NULL, w, penalty=NULL, trace=FALSE) {# MPLE for CP of univariate SN (not intendend for ESN) y <- drop(y) n <- length(y) if (missing(x)) x <- matrix(rep(1,n), nrow=n, ncol=1) else if (is.null(n <- nrow(x))) stop("'x' must be a matrix") if (n == 0) stop("0-row design matrix cases") if (missing(w)) w <- rep(1,n) if(length(w) != n) stop("incompatible dimensions") y.name <- deparse(substitute(y)) x.name <- deparse(substitute(x)) p <- ncol(x) max.gamma1 <- 0.5*(4-pi)*(2/(pi-2))^1.5 - (.Machine$double.eps)^(1/4) if(is.null(cp)) { qr.x <- qr(x) s <- sqrt(sum(qr.resid(qr.x, y)^2)/n) gamma1 <- sum(qr.resid(qr.x, y)^3)/(n*s^3) if(abs(gamma1) > max.gamma1) gamma1 <- sign(gamma1)*0.9*max.gamma1 cp <- as.numeric(c(qr.coef(qr.x, y), s, gamma1)) } else{ if(length(cp)!= (p+2)) stop("ncol(x)+2 != length(cp)")} opt <- nlminb(cp, objective=sn.pdev, gradient=sn.pdev.gh, hessian=sn.pdev.hessian, lower=c(-rep(Inf,p), sqrt(.Machine$double.eps), -max.gamma1), upper=c(rep(Inf,p), Inf, max.gamma1), x=x, y=y, w=w, penalty=penalty, trace=trace) cp <- opt$par names(cp) <- param.names("CP", "SN", p, colnames(x)[-1]) logL <- (-opt$objective)/2 boundary <- as.logical(abs(cp[p+2]) >= max.gamma1) if(trace) { cat("Message from optimization routine (nlminb):", opt$message, "\n") cat("estimates (cp):", cp, "\n") cat("(penalized) log-likelihood:", logL, "\n") } list(call=match.call(), cp=cp, logL=logL, boundary=boundary, opt.method=opt) } sn.pdev <- function(cp, x, y, w, penalty=NULL, trace=FALSE) { # "penalized deviance"=-2*(logL-Q) for centred parameters of SN distribution p <- ncol(x) if(abs(cp[p+2])> 0.9952717) return(Inf) if(missing(w)) w <- rep(1, length(y)) if(any(w < 0)) stop("weights must be non-negative") dp <- cp2dpUv(cp, "SN") xi <- as.vector(x %*% as.matrix(dp[1:p])) logL <- sum(w * dsn(y, xi, dp[p+1], dp[p+2], log=TRUE)) Q <- if(is.null(penalty)) 0 else penalty(dp[p+2], der=0) if(trace) cat("sn.pdev: (cp,pdev) =", format(c(cp, -2*(logL-Q))),"\n") return(-2 * (logL - Q)) } sn.pdev.gh <- function(cp, x, y, w, penalty=NULL, trace=FALSE, hessian=FALSE) { # computes gradient and hessian of pdev=-2*(logL-Q) for centred parameters p <- ncol(x) n <- nrow(x) if(abs(cp[p+2]) > 0.9952717) return(rep(NA,p+2)) if(missing(w)) w <- rep(1,n) if(any(w < 0)) stop("weights must be non-negative") score <- rep(NA,p+2) info <- matrix(NA,p+2,p+2) beta <- cp[1:p] sigma <- cp[p+1] gamma1 <- cp[p+2] nw <- sum(w) dp <- cp2dpUv(cp, "SN") lambda <- dp[p+2] mu <- as.vector(x %*% as.matrix(beta)) d <- y-mu r <- d/sigma mu.z<- lambda*sqrt(2/(pi*(1+lambda^2))) sd.z<- sqrt(1-mu.z^2) z <- mu.z+sd.z*r p1 <- as.vector(zeta(1,lambda*z)) p2 <- as.vector(zeta(2,lambda*z)) omega<- sigma/sd.z af <- lambda*p1-mu.z Dmu.z <- sqrt(2/pi)/(1+lambda^2)^1.5 Dsd.z <- (-mu.z/sd.z)*Dmu.z Dz <- Dmu.z + r*Dsd.z DDmu.z<- (-3)*mu.z/(1+lambda^2)^2 DDsd.z<- -((Dmu.z*sd.z-mu.z*Dsd.z)*Dmu.z/sd.z^2+mu.z*DDmu.z/sd.z) DDz <- DDmu.z + r*DDsd.z score[1:p] <- omega^(-2) * t(x) %*% as.matrix(w*(y-mu-omega*af)) score[p+1] <- (-nw)/sigma + sd.z*sum(w*d*(z-p1*lambda))/sigma^2 score.l <- nw*Dsd.z/sd.z - sum(w*z*Dz) + sum(w*p1*(z+lambda*Dz)) if(!is.null(penalty)) { Q <- penalty(lambda, der=2) score.l <- (score.l - attr(Q, "der1")) } Dg.Dl <- 1.5*(4-pi)*mu.z^2 * (Dmu.z*sd.z - mu.z*Dsd.z)/sd.z^4 R <- mu.z/sd.z T <- sqrt(2/pi-(1-2/pi)*R^2) Dl.Dg <- 2*(T/(T*R)^2+(1-2/pi)/T^3)/(3*(4-pi)) R. <- 2/(3*R^2 * (4-pi)) T. <- (-R)*R.*(1-2/pi)/T DDl.Dg <- (-2/(3*(4-pi))) * (T./(R*T)^2+2*R./(T*R^3)+3*(1-2/pi)*T./T^4) score[p+2] <- score.l/Dg.Dl # convert deriv wrt lamda to gamma1 gradient <- (-2)*score if(hessian){ # info = -(second deriv of logL) info[1:p,1:p] <- omega^(-2) * t(x) %*% (w*(1-lambda^2*p2)*x) info[1:p,p+1] <- info[p+1,1:p] <- sd.z* t(x) %*% as.matrix(w*(z-lambda*p1)+ w*d*(1-lambda^2*p2)* sd.z/sigma)/sigma^2 info[p+1,p+1] <- (-nw)/sigma^2 + 2*sd.z*sum(w*d*(z-lambda*p1))/sigma^3 + sd.z^2*sum(w*d*(1-lambda^2*p2)*d)/sigma^4 info[1:p,p+2] <- info[p+2,1:p] <- t(x) %*% (w* (-2*Dsd.z*d/omega+Dsd.z*af+sd.z*(p1+lambda*p2*(z+lambda*Dz) -Dmu.z)))/sigma info[p+1,p+2] <- info[p+2,p+1] <- -sum(w*d*(Dsd.z*(z-lambda*p1)+sd.z*(Dz-p1-p2*lambda*(z+lambda*Dz)) ))/sigma^2 info[p+2,p+2] <- (nw*(-DDsd.z*sd.z+Dsd.z^2)/sd.z^2+sum(w*(Dz^2+z*DDz)) - sum(w*p2*(z+lambda*Dz)^2)- sum(w*p1*(2*Dz+lambda*DDz))) if(!is.null(penalty)) info[p+2,p+2] <- info[p+2,p+2] + attr(Q, "der2") info[p+2,] <- info[p+2,]/Dg.Dl # convert info wrt lambda to gamma1 info[,p+2] <- info[,p+2]*Dl.Dg # an equivalent form of the above info[p+2,p+2] <- info[p+2,p+2] - score.l*DDl.Dg attr(gradient,"hessian") <- force.symmetry(2*info) } if(trace) cat("sn.pdev.gh: gradient = ", format(gradient),"\n") return(gradient) } sn.pdev.hessian <- function(cp, x, y, w, penalty=NULL, trace=FALSE) { gh <- sn.pdev.gh(cp, x, y, w, penalty=penalty, trace=trace, hessian=TRUE) attr(gh, "hessian") } Qpenalty <- function(alpha_etc, nu=NULL, der=0) {# 'standard' penalty function of logL, possibly with derivatives e1 <- e1. <- 1/3 e2 <- e2. <- 0.2854166 if(!is.null(nu)) { g <- 0.57721 e1 <- e1. * (nu+2)*(nu+3)/(nu+1)^2 e2 <- e2. * (1 + 4/(nu+g)) } c1 <- 1/(4*e2) c2 <- e2/e1 if(is.vector(alpha_etc) && length(alpha_etc)==1) { alpha<- alpha_etc Obar.alpha <- alpha alpha2 <- alpha^2 } else { if(!is.list(alpha_etc)) stop("wrong argument alpha_etc") alpha <- alpha_etc[[1]] Omega.bar <- alpha_etc[[2]] if(any(dim(Omega.bar) != length(alpha))) stop("dimension mismatch") Obar.alpha <- as.vector(Omega.bar %*% alpha) alpha2 <- sum(alpha* Obar.alpha) } Q <- c1 * log(1 + c2* alpha2) if(der==0) return(Q) der1 <- 2*c1*c2*Obar.alpha/(1+ c2*alpha2) if(!is.null(nu)) { h <- (nu+g)*(nu+2)*(nu+3) dc1.dnu <- 1/(e2.*(nu+g+4)^2) tmp <- ((nu+1)^2 + 2*(nu+1)*(nu+g+4)) * h - (nu+1)^2*(nu+g+4)*( (nu+2)*(nu+3)+ (nu+2)*(nu+g)+(nu+3)*(nu+g)) dc2.dnu <- 3*e2.*tmp/h^2 der1 <- c(der1, Q*dc1.dnu/c1+ c1*alpha2*dc2.dnu/(1+c2*alpha2)) } attr(Q, "der1") <- der1 if(der==2) { attr(Q, "der2") <- if(is.null(nu)) 2*c1*c2*(1-c2*alpha^2)/(1+c2*alpha^2)^2 else { # Qdash <- function(x) attr(Qpenalty(x[1], x[2], der=1), "der1") # H <- jacobian(Qdash, c(alpha,nu)) Q.fn <- function(x) Qpenalty(x[1], x[2], der=0) hessian(Q.fn, c(alpha, nu)) } } return(Q) } MPpenalty <- function(alpha, der=0) {# penalty function associated to "matching prior" of Cabras et al.(SJS, 2012) a <- sn.infoUv(dp=c(0,1,alpha))$aux$a.coef a0 <- a[1] a1 <- a[2] a2 <- a[3] A <- 1+alpha^2 num <- (a2*A^2*(pi*(1+a0*alpha^4) + alpha^2*(pi*(1+a0)-4)) +2*sqrt(2*pi)*a1*alpha*A^1.5 - pi*a1^2*alpha^2*A^3 -2) den <- (pi*A^3*(2+alpha^2*(2*a0+a2)+ alpha^4*(a0*a2-a1^2)) -2*(alpha+2*alpha^3)^2 -2*sqrt(2*pi)*a1*alpha^3*sqrt(A)*(1+3*alpha^2+2*alpha^4)) prior <- sqrt(num/den) penalty <- -log(prior) if(der > 0) attr(penalty,"der1") <- grad(MPpenalty, alpha) if(der > 1) attr(penalty,"der2") <- hessian(MPpenalty, alpha) return(penalty) } msn.mple <- function(x, y, start=NULL, w, trace=FALSE, penalty=NULL, opt.method=c("nlminb", "Nelder-Mead", "BFGS", "CG", "SANN"), control=list() ) { y <- data.matrix(y) if(missing(x)) x <- rep(1,nrow(y)) else {if(!is.numeric(x)) stop("x must be numeric")} if(missing(w)) w <- rep(1,nrow(y)) opt.method <- match.arg(opt.method) x <- data.matrix(x) d <- ncol(y) n <- sum(w) p <- ncol(x) y.names <- dimnames(y)[[2]] x.names <- dimnames(x)[[2]] if(is.null(start)) start <- msn.mle(x, y, NULL, w)$dp if(trace){ cat("msn.mple initial parameters:\n") print(cbind(t(start[[1]]), start$Omega, start$alpha)) } param <- dplist2optpar(start) if(opt.method == "nlminb"){ opt <- nlminb(param, msn.pdev, # msn.pdev.grad, control=control, x=x, y=y, w=w, penalty=penalty, trace=trace) opt$value<- opt$objective } else{ opt <- optim(param, fn=msn.pdev, method=opt.method, control=control, x=x, y=y, w=w, penalty=penalty, trace=trace) } if(trace) cat(paste("Message from optimization routine:", opt$message,"\n")) logL <- opt$value/(-2) dp.list <- optpar2dplist(opt$par, d, p) beta <- dp.list$beta dimnames(beta)[2] <- list(y.names) dimnames(beta)[1] <- list(x.names) Omega <- dp.list$Omega alpha <- dp.list$alpha dimnames(Omega) <- list(y.names,y.names) names(alpha) <- y.names alpha2 <- sum(alpha * as.vector(cov2cor(Omega) %*% alpha)) delta.star <- sqrt(alpha2/(1+alpha2)) dp <- list(beta=beta, Omega=Omega, alpha=alpha) opt$opt.method <- opt.method aux <- list(penalty=penalty, alpha.star=sqrt(alpha2), delta.star=delta.star) list(call=match.call(), dp=dp, logL=logL, aux=aux, opt.method=opt) } msn.pdev <- function(param, x, y, w, penalty=NULL, trace=FALSE) { # -2*(profile.logL - Q) d <- ncol(y) if(missing(w)) w <- rep(1, nrow(y)) n <- sum(w) p <- ncol(x) dp. <- optpar2dplist(param, d=ncol(y), p=ncol(x)) logL <- sum(w * dmsn(y, x %*% dp.$beta, dp.$Omega, dp.$alpha, log=TRUE)) Q <- if(is.null(penalty)) 0 else penalty(list(dp.$alpha,dp.$Omega), der=0) pdev <- (-2)*(logL-Q) if(trace) cat("msn.pdev:", pdev, "\n", "opt param:", format(param),"\n") return(pdev) } optpar2dplist <- function(param, d, p, x.names=NULL, y.names=NULL) {# convert vector form of optimization parameters to DP list; # output includes inverse(Omega) and its log determinant beta <- matrix(param[1:(p * d)], p, d) D <- exp(-2 * param[(p * d + 1):(p * d + d)]) A <- diag(d) i0 <- p*d + d*(d+1)/2 if(d>1) A[!lower.tri(A,diag=TRUE)] <- param[(p*d+d+1):i0] eta <- param[(i0 + 1):(i0 + d)] nu <- if(length(param) == (i0 + d + 1)) exp(param[i0 + d + 1]) else NULL Oinv <- t(A) %*% diag(D,d,d) %*% A # Omega <- pd.solve(Oinv) Ainv <- backsolve(A, diag(d)) Omega <- Ainv %*% diag(1/D,d,d) %*% t(Ainv) Omega <- (Omega + t(Omega))/2 omega <- sqrt(diag(Omega)) alpha <- eta * omega dimnames(beta) <- list(x.names, y.names) dimnames(Omega) <- list(y.names, y.names) if (length(y.names) > 0) names(alpha) <- y.names dp <- list(beta=beta, Omega=Omega, alpha=alpha) if(!is.null(nu)) dp$nu <- nu list(dp=dp, beta=beta, Omega=Omega, alpha=alpha, nu=nu, Omega.inv=Oinv, log.det=sum(log(D))) } dplist2optpar <- function(dp, Omega.inv=NULL) {# convert DP list to vector form of optimization parameters beta <- dp[[1]] Omega <- dp[[2]] alpha <- dp[[3]] d <- length(alpha) nu <- if(is.null(dp$nu)) NULL else dp$null eta <- alpha/sqrt(diag(Omega)) Oinv <- if(is.null(Omega.inv)) pd.solve(Omega) else Omega.inv if(is.null(Oinv)) stop("matrix Omega not symmetric positive definite") upper <- chol(Oinv) D <- diag(upper) A <- upper/D D <- D^2 param <- if(d > 1) c(beta, -log(D)/2, A[!lower.tri(A, diag = TRUE)], eta) else c(beta, -log(D)/2, eta) if(!is.null(dp$nu)) param <- c(param, log(dp$nu)) param <- as.numeric(param) attr(param, 'ind') <- cumsum(c(length(beta), d, d*(d-1)/2, d, length(dp$nu))) return(param) } force.symmetry <- function(x, tol=10*sqrt(.Machine$double.eps)) { if(!is.matrix(x)) stop("x must be a matrix") # err <- abs(x-t(x)) err <- abs(x-t(x))/(1+abs(x)) max.err <- max(err/(1+err)) if(max.err > tol) warning("matrix seems not symmetric") if(max.err > 100*tol) stop("this matrix really seems not symmetric") return((x + t(x))/2) } duplication_matrix <- function (n=1) {# translated by AA from Octave code written of if ( (n<1) | (round (n) != n) ) stop ("n must be a positive integer") d <- matrix (0, n * n, n * (n + 1) / 2) ## KH: It is clearly possible to make this a LOT faster! count = 0 for (j in 1 : n){ d [(j - 1) * n + j, count + j] = 1 if(j= 1)) stop("probs must be within (0,1)") if(sum(probs > 0 && probs < 1) == 0) stop("invalid probs") if(missing(npt)) npt <- rep(101, d) if(missing(main)) { main <- if(d==2) paste("Density function of", slot(obj, "name")) else paste("Bivariate densities of", slot(obj, "name")) } if(missing(comp)) comp <- seq(1,d) if(missing(compLabs)) compLabs <- compNames if(length(compLabs) != d) stop("wrong length of 'compLabs' or 'comp' vector") family <- toupper(obj@family) lc.family <- tolower(family) if(lc.family == "esn") lc.family <- "sn" dp <- slot(obj, "dp") if(missing(range)) { range <- matrix(NA,2,d) q.fn <- get(paste("q", lc.family, sep=""), inherits=TRUE) for(j in 1:d) { marg <- marginalSECdistr(obj, comp=j, drop=TRUE) q <- q.fn(c(0.05, 0.25, 0.75, 0.95), dp=marg@dp) dq <- diff(q) range[,j] <- c(q[1] - 1.5*dq[1], q[length(q)] + 1.5*dq[length(dq)]) if(!is.null(data)) { range[1,j] <- min(range[1,j], min(data[,j])) range[2,j] <- max(range[2,j], max(data[,j])) }} } dots <- list(...) nmdots <- names(dots) if(d == 1) { message("Since dimension=1, plot as a univariate distribution") objUv <- marginalSECdistr(obj, comp=1, drop=TRUE) out <- plot(objUv, data=data, ...) } if(d == 2) out <- plot.SECdistrBv(x, range, probs, npt, compNames, compLabs, landmarks, data, data.par, main, ...) if(d > 2) { textPanel <- function(x = 0.5, y = 0.5, txt, cex, font) text(x, y, txt, cex = cex, font = font) localAxis <- function(side, x, y, xpd, bg, main, oma, ...) { if (side%%2 == 1) Axis(x, side = side, xpd = NA, ...) else Axis(y, side = side, xpd = NA, ...) } localPlot <- function(..., oma, font.main, cex.main) plot.SECdistrBv(...) text.diag.panel <- compLabs oma <- if ("oma" %in% nmdots) dots$oma else NULL if (is.null(oma)) { oma <- c(4, 4, 4, 4) if (!is.null(main)) oma[3L] <- 6 } opar <- par(mfrow = c(length(comp), length(comp)), mar = rep(c(gap,gap/2), each=2), oma=oma) on.exit(par(opar)) out <- list() count <- 0 for (i in comp) for (j in comp) { count <- count + 1 if(i == j) { plot(1, type="n", xlab="", ylab="", axes=FALSE) text(1, 1, text.diag.panel[i], cex=2) box() out[[count]] <- paste("diagonal component", compNames[i]) } else { ji <- c(j,i) marg <- marginalSECdistr(obj, comp=ji) out[[count]] <- localPlot(x=marg, range=range[,ji], probs=probs, npt=npt[ji], compNames= compNames[ji], compLabs=compLabs[ji], landmarks=landmarks, data=data[,ji], data.par=data.par, main="", yaxt="n", xaxt="n", ...) # if(i==comp[1]) {axis(3); if(j==length(comp)) axis(4)} # if(j==comp[1]) {axis(2); if(i==length(comp)) axis(1)} if(i==comp[1]) axis(3) ; if(j==length(comp)) axis(4) if(j==comp[1]) axis(2) ; if(i==length(comp)) axis(1) box() } } par(new = FALSE) if (!is.null(main)) { font.main <- if ("font.main" %in% nmdots) dots$font.main else par("font.main") cex.main <- if ("cex.main" %in% nmdots) dots$cex.main else par("cex.main") mtext(main, side=3, TRUE, line=5, outer = TRUE, at=NA, cex=cex.main, font=font.main, adj=0.5) }} invisible(out) } plot.SECdistrBv <- function(x, range, probs, npt=rep(101,2), compNames, compLabs, landmarks, data=NULL, data.par, main, ...) {# plot BiVariate SEC distribution obj <- x dp <- slot(obj, "dp") family <- slot(obj, "family") lc.family <- tolower(family) if(lc.family == "esn") lc.family <- "sn" d.fn <- get(paste("dm", lc.family, sep=""), inherits=TRUE) # density funct n1 <- npt[1] n2 <- npt[2] x1 <- seq(min(range[,1]), max(range[,1]), length=n1) x2 <- seq(min(range[,2]), max(range[,2]), length=n2) x1.x2 <- cbind(rep(x1, n2), as.vector(matrix(x2, n1, n2, byrow=TRUE))) X <- matrix(x1.x2, n1 * n2, 2, byrow = FALSE) pdf <- matrix(d.fn(X, dp=dp), n1, n2) Omega <- dp[[2]] Omega.bar <- cov2cor(Omega) alpha <- dp[[3]] alpha.star <- sqrt(sum(alpha * as.vector(Omega.bar %*% alpha))) omega <- sqrt(diag(Omega)) if(lc.family == "sn") { k.tau <- if (length(dp) == 4) (zeta(2,dp[[4]])*pi)^2/4 else 1 log.levels <- (log(1-probs) - log(2*pi)- 0.5*log(1-Omega.bar[1,2]^2) + k.tau * log(1+exp(-1.544/alpha.star))) - sum(log(omega)) } if(lc.family == "st" | lc.family == "sc") { nu <- if(lc.family == "st") obj@dp[[4]] else 1 l.nu <- (-1.3/nu - 4.93) h <- 100 * log(exp(((1.005*alpha.star-0.045)* l.nu -1.5)/alpha.star)+1) K <- h *(1.005*alpha.star-0.1)*(1+nu)/(alpha.star * nu) qF <- qf(probs, 2, nu) log.levels <- (lgamma(nu/2+1) -lgamma(nu/2) - log(pi*nu) -0.5*log(1-Omega.bar[1,2]^2) - (nu/2+1)*log(2*qF/nu + 1) + K -sum(log(omega))) } oo <- options() options(warn=-1) contour(x1, x2, pdf, levels=exp(log.levels), labels=paste("p=", as.character(probs), sep=""), main=main, xlab=compLabs[1], ylab=compLabs[2], ...) if(!is.null(data)) { col <- if(!is.null(data.par$col)) data.par$col else par()$col pch <- if(!is.null(data.par$pch)) data.par$pch else par()$pch cex <- if(!is.null(data.par$cex)) data.par$cex else par()$cex points(data, col=col, pch=pch, cex=cex) } if(landmarks != "") { if(landmarks == "auto") { mean.type <- "proper" if(lc.family == "sc") mean.type <- "pseudo" if(lc.family == "st") { if(dp[[4]] <= 1) mean.type <- "pseudo"} } else mean.type <- landmarks landmarks.label <- c("origin", "mode", if(mean.type == "proper") "mean" else "mean~") cp <- dp2cpMv(dp, family, cp.type=mean.type, upto=1) mode <- modeSECdistrMv(dp, family) x.pts <- c(dp$xi[1], mode[1], cp[[1]][1]) y.pts <- c(dp$xi[2], mode[2], cp[[1]][2]) points(x.pts, y.pts, ...) text(x.pts, y.pts, landmarks.label, pos=2, offset=0.3, ...) lines(x.pts, y.pts, lty=2) } options(oo) return(list(x=x1, y=x2, names=compNames, density=pdf)) } plot.selm <- function(x, param.type="CP", which = c(1:4), caption, panel = if (add.smooth) panel.smooth else points, main = "", # sub.caption = NULL, ask = prod(par("mfcol")) < length(which) && dev.interactive(), ..., id.n = 3, labels.id = names(x@residuals.dp), cex.id = 0.75, identline = TRUE, add.smooth = getOption("add.smooth"), label.pos = c(4, 2), cex.caption = 1) { if(class(x) != "selm") stop("object not of class 'selm'") show <- rep(FALSE, 4) show[which] <- TRUE p <- slot(x, "size")["p"] if(missing(caption)) { caption <- if(p> 1) c("Residuals vs Fitted Values", "Residual values and fitted error distribution", "Q-Q plot of (scaled DP residuals)^2", "P-P plot of (scaled DP residuals)^2") else c("Boxplot of observed values", "Empirical values and fitted distribution", "Q-Q plot of (scaled DP residuals)^2", "P-P plot of (scaled DP residuals)^2")} # param <- eval(parse(text=paste("x@param$'", param.type, "'",sep=""))) param <- slot(x, "param")[[tolower(param.type)]] if(is.null(param)) { message(paste( "Requested param.type='", param.type, "' evaluates to NULL.", sep="")) if(param.type == "pseudo-cp" & x@family== "SN") message("Pseudo-CP makes no sense for SN family") if(param.type == "cp" & x@family== "SC") message("CP makes no sense for SC family") if(param.type == "cp" & x@family== "ST") message("CP of ST family requires nu>4") stop("Consider another choice of param.type") } param.type <- tolower(param.type) r <- residuals(x, param.type) r.lab <- paste(toupper(param.type), "residuals") dp <- complete.dp(x) n <- slot(x, "size")["n.obs"] yh <- fitted(x, param.type) w <- weights(x) if (!is.null(w)) { wind <- (w != 0) r <- r[wind] yh <- yh[wind] w <- w[wind] labels.id <- labels.id[wind] } else w <- rep(1,n) rw <- n*w/slot(x,"size")["nw.obs"] if (is.null(id.n)) id.n <- 0 else { id.n <- as.integer(id.n) if (id.n < 0 || id.n > n) stop(gettextf("'id.n' must be in {1,..,%d}", n), domain = NA) } if (id.n > 0) { if (is.null(labels.id)) labels.id <- paste(1:n) iid <- 1:id.n show.r <- sort.list(abs(r), decreasing = TRUE)[iid] if (any(show[3:4])) { rs <- sort(abs(slot(x,"residuals.dp")/slot(x,"param")$dp[p+1])) rs2 <- rs^2 show.rs <- sort.list(rs, decreasing = TRUE)[iid] rs.lab <- paste("(scaled DP residuals)^2") nu. <- switch(x@family, ST = dp[p+3], SN = Inf, SC=1) } text.id <- function(x, y, ind, adj.x = TRUE) { labpos <- if (adj.x) label.pos[1 + as.numeric(x > mean(range(x)))] else 3 text(x, y, labels.id[ind], cex = cex.id, xpd = TRUE, pos = labpos, offset = 0.25) } } one.fig <- prod(par("mfcol")) == 1 if (ask) { oask <- devAskNewPage(TRUE) on.exit(devAskNewPage(oask)) } if (show[1]) { if(all(is.na(r)) & p>1) message(paste("CP residuals not available;", "consider param.type='DP' or 'pseudo-CP'")) else { if(p == 1){ y <- (x@residuals.dp + x@fitted.values.dp) boxplot(y, plot=TRUE, col="gray85", border="gray60") } else { # p>1 # if (id.n > 0) # ylim <- extendrange(r = ylim, f = 0.08) ylim <- range(r, na.rm = TRUE) plot(yh, r, xlab = "Fitted values", ylab = r.lab, main = main, ylim = ylim, type = "n") panel(yh, r, cex=sqrt(rw), ...) # if (one.fig) title(sub = sub.caption, ...) if (id.n > 0) { y.id <- r[show.r] y.id[y.id < 0] <- y.id[y.id < 0] - strheight(" ")/3 text.id(yh[show.r], y.id, show.r) } abline(h = 0, lty = 2, col = "gray") } } mtext(caption[1], 3, 0.5, cex = cex.caption) } if (show[2]) { if(all(is.na(r)) & p>1) message( "CP residuals not available; consider param.type='DP' or 'pseudo-CP'") else { if (p == 1){ y <- (x@residuals.dp + x@fitted.values.dp) dp0 <- dp xlab="observed variable"} else { y <- r dp0 <- as.numeric(c(dp[1]-param[1], dp[-(1:p)])) xlab=r.lab } h <- hist(rep(y, w), plot=FALSE) extr <- extendrange(x=h$breaks) x.pts <- seq(max(extr), min(extr), length=201) d.fn <- get(paste("d", tolower(x@family), sep=""), inherits = TRUE) pdf <- d.fn(x.pts, dp=dp0) plot(c(h$mids, x.pts), c(h$density, pdf), type="n", main=main, xlab=xlab, ylab="probability density") hist(rep(y, w), col="gray95", border="gray60", probability=TRUE, freq=FALSE, add=TRUE) lines(x.pts, pdf, ...) mtext(caption[2], 3, 0.25, cex = cex.caption) }} if (show[3]) { ylim <- c(0, max(pretty(rs2))) q <- qf((1:n)/(n+1), 1, nu.) plot(q, rs2, xlab="Theoretical values", ylab="Empirical values", ylim=ylim, type="p", main=main, cex=sqrt(rw), ...) if(identline) abline(0, 1, lty = 2, col = "gray50") # if (one.fig) title(sub = sub.caption, ...) mtext(caption[3], 3, 0.25, cex = cex.caption) if (id.n > 0) text.id(q[show.rs], rs2[show.rs], show.rs) } if (show[4]) { p <- (1:n)/(n+1) pr <- pf(rs2, 1, nu.) plot(p, pr, xlab="Theoretical values", ylab="Empirical values", xlim=c(0,1), ylim=c(0,1), main=main, cex=sqrt(rw), ...) if(identline) abline(0, 1, lty = 2, col = "gray50") # if (one.fig) title(sub = sub.caption, ...) mtext(caption[4], 3, 0.25, cex = cex.caption) if(identline) abline(0, 1, lty = 2, col = "gray50") if (id.n > 0) text.id(p[show.rs], pr[show.rs], show.rs) } # if (!one.fig && par("oma")[3] >= 1) # mtext(sub.caption, outer = TRUE, cex = 1.25) invisible() } print.summary.selm <- function(object) { obj <- object digits = max(3, getOption("digits") - 3) cat("Call: ") print(slot(obj, "call")) n <- obj@size["n.obs"] cat("Number of observations:", n, "\n") if(!is.null(slot(obj,"aux")$weights)) cat("Weighted number of observations:", obj@size["nw.obs"], "\n") show.family <- slot(obj,"family") cat("Family:", show.family,"\n") fixed <- slot(obj, "param.fixed") if(length(fixed) > 0) { fixed.char <- paste(names(fixed), format(fixed), sep=" = ", collapse=", ") cat("Fixed parameters:", fixed.char, "\n") } method <- slot(obj, "method") u <- if(length(method)==1) NULL else paste(", penalty function:", method[2]) cat("Estimation method: ", method[1], u, "\n", sep="") logL.name <- paste(if(method[1] == "MLE") "Log" else "Penalized log", "likelihood:", sep="-") cat(logL.name, format(slot(obj,"logL"), nsmall=2), "\n") param.type <- slot(obj, "param.type") cat("Parameter type:", param.type,"\n") if(obj@boundary) cat("Estimates on/near the boundary of the parameter space\n") resid <- slot(obj, "resid") if(n > 5) { nam <- c("Min", "1Q", "Median", "3Q", "Max") rq <- if (length(dim(resid)) == 2) structure(apply(t(resid), 1, quantile), dimnames = list(nam, dimnames(resid)[[2]])) else structure(quantile(resid), names = nam) cat("\n", param.type, " residuals:\n", sep="") print(rq, digits = digits) } param <- slot(obj,"param.table") p <- obj@size["p"] cat("\nRegression coefficients\n") printCoefmat(param[1:p, ,drop=FALSE], digits = digits, signif.stars = getOption("show.signif.stars"), na.print = "NA") cat("\nParameters of the SEC random component\n") printCoefmat(param[(p+1):nrow(param), 1:2, drop=FALSE], digits = digits, signif.stars = FALSE, na.print = "NA") if(!is.null(obj@aux$param.cor)) { cat("\nCorrelations of parameter estimates:\n") print(obj@aux$param.cor) } if(!is.null(obj@aux$param.cov)) { cat("\nCovariances of parameter estimates:\n") print(obj@aux$param.cov) } invisible(object) } plot.mselm <- function (x, param.type="CP", which, caption, panel = if (add.smooth) panel.smooth else points, main = "", # sub.caption = NULL, ask = prod(par("mfcol")) < length(which) && dev.interactive(), ..., id.n = 3, labels.id = names(x@residuals.dp), cex.id = 0.75, identline = TRUE, add.smooth = getOption("add.smooth"), label.pos = c(4, 2), cex.caption = 1) { # cat("this plot method should possibly be expanded\n") p <- slot(x,"size")["p"] if(missing(which)) which <- if(p == 1) c(1,3,4) else 2:4 show <- rep(FALSE, 4) show[which] <- TRUE if(missing(caption)) caption <- c("Observed values and fitted distribution", paste("Distribution of", param.type, "residual values"), "Q-Q plot of Mahalanobis distances", "P-P plot of Mahalanobis distances") param <- slot(x, "param")[[tolower(param.type)]] if(is.null(param)) { message(paste( "Requested param.type='", param.type, "' evaluates to NULL.", sep="")) if(param.type == "pseudo-cp" & x@family== "SN") message("Pseudo-CP makes no sense for SN family") if(param.type == "cp" & x@family== "SC") message("CP makes no sense for SC family") if(param.type == "cp" & x@family== "ST") message("CP of ST family requires nu>4") stop("Consider another choice of param.type") } param.type <- tolower(param.type) r <- residuals(x, param.type) r.lab <- paste(toupper(param.type), "residuals") # family <- x@family dp <- complete.dp(x) cp <- dp2cpMv(dp, family=x@family, cp.type="auto") n <- slot(x,"size")["n.obs"] d <- x@size["d"] yh <- fitted(x, param.type) w <- weights(x) if (!is.null(w)) { wind <- w != 0 r <- r[wind] yh <- yh[wind] w <- w[wind] labels.id <- labels.id[wind] } else w <- rep(1,n) rw <- n*w/slot(x,"size")["nw.obs"] if (is.null(id.n)) id.n <- 0 else { id.n <- as.integer(id.n) if (id.n < 0 || id.n > n) stop(gettextf("'id.n' must be in {1,..,%d}", n), domain = NA) } if (id.n > 0) { if (is.null(labels.id)) labels.id <- paste(1:n) iid <- 1:id.n show.r <- sort.list(abs(r), decreasing = TRUE)[iid] if (any(show[3:4])) { Omega.inv <- pd.solve(dp$Omega, silent=TRUE) r.dp <- t(slot(x, "residuals.dp")) rs2 <- apply((Omega.inv %*% r.dp) * r.dp, 2, sum) show.rs <- sort.list(rs2, decreasing = TRUE)[iid] nu. <- switch(x@family, ST = dp$nu, SN = Inf, SC=1) } text.id <- function(x, y, ind, adj.x = TRUE) { labpos <- if (adj.x) label.pos[1 + as.numeric(x > mean(range(x)))] else 3 text(x, y, labels.id[ind], cex = cex.id, xpd = TRUE, pos = labpos, offset = 0.25) } } one.fig <- prod(par("mfcol")) == 1 if (ask) { oask <- devAskNewPage(TRUE) on.exit(devAskNewPage(oask)) } if (show[1]) { # only if no covariates exists (except constant) if(p == 1) { y <- (x@residuals.dp + x@fitted.values.dp) fitted.distr <- makeSECdistr(dp, family=x@family, name="fitted distribution", compNames=colnames(x@param$dp[[1]])) plot(fitted.distr, landmarks="", data=y, cex=sqrt(rw), main=main, ...) mtext(caption[1], 3, 1.5, cex = cex.caption) } else message(paste("plot of (observed data, fitted distribution)", "makes no sense if covariates exist")) } if (show[2]) { dp0 <- dp # dp0[[1]] <- rep(0,d) dp0[[1]] <- as.numeric((dp[[1]]-param[[1]])[1,]) data.par <- list(col=list(...)$col, pch=list(...)$pch, cex=sqrt(rw)) resid.distr <- makeSECdistr(dp0, family=x@family, name="Residual distribution", compNames=colnames(x@residuals.dp)) plot(resid.distr, landmarks="", data=residuals(x, param.type), main=main, data.par=data.par) # mtext(caption[2], 3, 0.25, cex = cex.caption) mtext(caption[2], 3, 1.5, cex = cex.caption) } if (show[3]) { # ylim <- c(0, max(pretty(rs2))) q <- qf((1:n)/(n+1), d, nu.) * d plot(q, sort(rs2), xlab="theoretical values", ylab="empirical values", main=main, cex=sqrt(rw), ...) if(identline) abline(0, 1, lty = 2, col = "gray50") # if (one.fig) title(sub = sub.caption, ...) mtext(caption[3], 3, 0.25, cex = cex.caption) if (id.n > 0) text.id(q[n+1-iid], rs2[show.rs], show.rs) } if (show[4]) { p <- pf(rs2/d, d, nu.) p0 <- (1:n)/(n+1) plot(p0, sort(p), xlab="theoretical values", ylab="empirical values", xlim=c(0,1), ylim=c(0,1), main=main, cex=sqrt(rw), ...) if(identline) abline(0, 1, lty = 2, col = "gray50") # if (one.fig) title(sub = sub.caption, ...) mtext(caption[4], 3, 0.25, cex = cex.caption) # if (id.n > 0) text.id(p[show.rs], p0[n+1-iid], show.rs) } # if (!one.fig && par("oma")[3] >= 1) # mtext(sub.caption, outer = TRUE, cex = 1.25) invisible() } print.summary.mselm <- function(object) { obj <- object digits = max(3, getOption("digits") - 3) # cat("Obj: ", deparse(substitute(obj)),"\n") cat("Call: ") print(slot(obj,"call")) n <- obj@size["n.obs"] d <- obj@size["d"] # p <- obj@size["p"] cat("Number of observations:", n, "\n") nw <- obj@size["nw.obs"] if(n != nw) cat("Weighted number of observations:", nw, "\n") family <- slot(obj, "family") cat("Family:", family, "\n") method <- slot(object, "method") u <- if(length(method)==1) NULL else paste(", penalty function:", method[2]) cat("Estimation method: ", method[1], u, "\n", sep="") fixed <- slot(obj, "param.fixed") if(length(fixed) > 0) {fixed.char <- paste(names(fixed), format(fixed), sep=" = ", collapse=", ") cat("Fixed parameters:", fixed.char, "\n") } cat("Log-likelihood:", format(slot(obj,"logL"), nsmall=2), "\n") cat("Parameter type:", obj@param.type,"\n") if(obj@boundary) cat("Estimates on/near the boundary of the parameter space\n") names <- dimnames(obj@scatter$matrix)[[1]] for(j in 1:d) { param <- obj@coef.tables[[j]] cat("\n--- Response variable No.", j, ": ", names[j],"\n",sep="") resid <- obj@resid[,j] if(n>5) { nam <- c("Min", "1Q", "Median", "3Q", "Max") rq <- if (length(dim(resid)) == 2) structure(apply(t(resid), 1, quantile), dimnames = list(nam, dimnames(resid)[[2]])) else structure(quantile(resid), names = nam) cat(obj@param.type, "residuals\n") print(rq, digits = digits) } cat("\nRegression coefficients\n") printCoefmat(param[, ,drop=FALSE], digits = digits, signif.stars = getOption("show.signif.stars"), na.print = "NA") } cat("\n--- Parameters of the SEC random component\n") cat("Scatter matrix: ", obj@scatter$name,"\n", sep="") print(obj@scatter$matrix) cat("\nSlant parameter: ", obj@slant$name, "\n", sep="") print(cbind(estimate=obj@slant$param, std.err=obj@slant$se)) if(length(obj@tail) > 0) { cat("\nTail-weight parameter: ", obj@tail$name, "\n", sep="") print(c(estimate=obj@tail$param, std.err=obj@tail$se)) } if(!is.null(obj@aux$param.cor)) { cat("\nCorrelations of parameter estimates:\n") print(obj@aux$param.cor) } if(!is.null(obj@aux$param.cov)) { cat("\nVar-covariance matrix of parameter estimates:\n") print(obj@aux$param.cov) } } sn/R/sn_S4.R0000644000176000001440000003413012255404305012235 0ustar ripleyusers# file sn/R/sn_S4.R (S4 methods and classes) # This file is a component of the package 'sn' for R # copyright (C) 1997-2014 Adelchi Azzalini # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 or 3 of the License # (at your option). # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available at # http://www.r-project.org/Licenses/ #--------- setClass("SECdistrUv", representation(family="character", dp="numeric", name="character"), validity=function(object){ if(!(object@family %in% c("SN","ST","SC","ESN"))) return(FALSE) np <- 3 + as.numeric(object@family %in% c("ST","ESN")) if(length(object@dp) != np) return(FALSE) if(object@dp[2] <= 0) return(FALSE) TRUE } ) setClass("summary.SECdistrUv", representation(family="character", dp="numeric", name="character", cp="numeric", cp.type="character", aux="list"), validity=function(object){ if(!(object@family %in% c("SN","ST","SC","ESN"))) return(FALSE) np <- 3 + as.numeric(object@family %in% c("ST","ESN")) if(length(object@dp) != np) return(FALSE) if(object@dp[2] <= 0) return(FALSE) if(length(object@cp) != length(object@dp)) return(FALSE) TRUE } ) setClass("SECdistrMv", representation(family="character", dp="list", name="character", compNames="character"), validity=function(object){ if(!(object@family %in% c("SN","ST","SC","ESN"))) return(FALSE) np <- 3 + as.numeric(object@family %in% c("ST","ESN")) dp <- object@dp if(mode(unlist(dp)) != "numeric") return(FALSE) if(length(dp) != np) return(FALSE) d <- length(dp[[3]]) Omega <- dp[[2]] if(length(dp[[1]]) != d | any(dim(Omega) != c(d,d))) return(FALSE) if(any(Omega != t(Omega))) {message("non-symmetric Omega"); return(FALSE)} if(any(eigen(Omega, symmetric=TRUE, only.values=TRUE)$values <= 0)) { message("Omega not positive-definite") return(FALSE)} if(object@family == "ST") { if(dp[[4]] <= 0) return(FALSE) } if(length(object@compNames) != d) return(FALSE) if(length(object@name) != 1) return(FALSE) TRUE } ) setClass("summary.SECdistrMv", representation(family="character", dp="list", name="character", compNames="character", cp="list", cp.type="character", aux="list"), validity=function(object){ family <- object@family if(!(family %in% c("SN","ST","SC","ESN"))) return(FALSE) np <- 3 + as.numeric(family %in% c("ST","ESN")) dp <- object@dp if(mode(unlist(dp)) != "numeric") return(FALSE) if(length(dp) != np) return(FALSE) d <- length(dp[[3]]) if(length(dp[[1]]) != d | any(dim(dp[[2]]) != c(d,d))) return(FALSE) if(family == "ST") { if(dp[[4]] <= 0) return(FALSE) } if(length(object@compNames) != d) return(FALSE) if(length(object@name) != 1) return(FALSE) if(length(object@cp) != length(object@dp)) return(FALSE) TRUE } ) setMethod("show", "SECdistrUv", function(object){ if(object@name != "") cat("Probability distribution of variable '", object@name, "'\n", sep="") cat("Skew-elliptically contoured distribution of univariate family", object@family,"\nDirect parameters:\n") print(object@dp) } ) setMethod("show","SECdistrMv", function(object){ if(object@name != "") cat("Probability distribution of variable '", object@name, "'\n", sep="") dp <- object@dp attr(dp[[2]],"dimnames") <- list(paste("Omega[", object@compNames, ",]", sep=""), NULL) cat("Skew-elliptically contoured distribution of ", length(dp[[3]]), "-dimensional family ", object@family,"\nDirect parameters:\n", sep="") out <- rbind(xi=dp[[1]], Omega=dp[[2]], alpha=dp[[3]]) colnames(out) <- object@compNames print(out) if(object@family=="ST") cat("nu", "=", dp[[4]], "\n") if(object@family=="ESN") cat("tau", "=", dp[[4]], "\n") } ) # #-------------------- setMethod("show", "summary.SECdistrUv", function(object){ obj <- object if(obj@name != "") cat("Probability distribution of variable '", obj@name, "'\n", sep="") cat("\nSkew-elliptical distribution of univariate family", obj@family,"\n") cat("\nDirect parameters (DP):\n") print(c("",format(obj@dp)), quote=FALSE) cp <- obj@cp note <- if(obj@cp.type == "proper") NULL else ", type=pseudo-CP" cat(paste("\nCentred parameters (CP)", note, ":\n", sep="")) print(c("",format(cp)), quote=FALSE) cat("\nAuxiliary quantities:\n") print(c("",format(c(delta=obj@aux$delta, mode=obj@aux$mode))), quote=FALSE) cat("\nQuantiles:\n") q <- obj@aux$quantiles q0 <- c("q", format(q)) names(q0) <- c("p", names(q)) print(q0, quote=FALSE) measures <- rbind(obj@aux$std.cum, obj@aux$q.measures) cat("\nMeasures of skewness and kurtosis:\n ") attr(measures, "dimnames") <- list( c(" std cumulants", " quantile-based"), c("skewness", "kurtosis")) print(measures) } ) setMethod("show","summary.SECdistrMv", function(object){ obj <- object dp <- obj@dp if(obj@name != "") cat("Probability distribution of",obj@name,"\n") cat("Skew-elliptically contoured distribution of ", length(dp[[3]]), "-dimensional family ", obj@family,"\n", sep="") cat("\nDirect parameters (DP):\n") attr(dp[[2]], "dimnames") <- list(paste(" Omega[", obj@compNames, ",]", sep=""),NULL) out.dp <- rbind(" xi"=dp[[1]], omega=dp[[2]]," alpha"=dp[[3]]) colnames(out.dp) <- obj@compNames print(out.dp) if(length(dp) > 3){ extra <- unlist(dp[-(1:3)]) names(extra) <- paste(" ",names(dp[-(1:3)]), sep="") # print(extra) for(j in 1:length(extra)) cat(names(extra)[j], "=", extra[j], "\n") } cp <- obj@cp note <- if(obj@cp.type == "proper") NULL else ", type=pseudo-CP" cat("\nCentred parameters (CP)", note, ":\n", sep="") attr(cp[[2]], "dimnames") <- list(paste(" var.cov[", obj@compNames, ",]", sep=""),NULL) out.cp <- rbind(" mean"=cp[[1]], cp[[2]], " gamma1"=cp[[3]]) colnames(out.cp) <- obj@compNames print(out.cp) if(length(cp) > 3) { extra <- unlist(cp[-(1:3)]) names(extra) <- paste(" ", names(cp[-(1:3)]), sep="") for(j in 1:length(extra)) cat(names(extra)[j], "=", extra[j], "\n") } aux <- obj@aux out.aux <- rbind(" delta"=aux$delta, " mode"= aux$mode) colnames(out.aux) <- obj@compNames cat("\nAuxiliary quantities:\n") print(out.aux) cat("\nGlobal quantities:\n") cat(" alpha* =", format(aux$alpha.star), ", delta* =", format(aux$delta.star), "\n") mardia <- obj@aux$mardia cat(" Mardia's measures: gamma1M = ", format(mardia[1]), ", gamma2M = ", format(mardia[2]),"\n", sep="") invisible() } ) setClass("selm", representation(call="call", family="character", logL="numeric", method="character", param="list", param.var="list", size="vector", fixed.param="vector", residuals.dp="numeric", fitted.values.dp="numeric", control="list", input="list", opt.method="list"), validity=function(object){ if(class(object) != "selm") return(FALSE) if(!is.numeric(object@logL)) return(FALSE) if(!(object@family %in% c("SN","ST","SC","ESN"))) return(FALSE) if(!is.vector(object@param$dp)) return(FALSE) TRUE } ) setMethod("logLik", "selm", function(object){ logL <- slot(object,"logL") attr(logL, "df") <- as.numeric(slot(object, "size")["n.param"]) class(logL) <- "logLik" return(logL) } ) setMethod("coef", "selm", function(object, param.type="CP") { param <- slot(object,"param")[[tolower(param.type)]] if(is.null(param) & tolower(param.type)=="cp") { message("CP not defined, consider param.type='DP' or 'pseudo-CP'") return(NULL)} param} ) setMethod("vcov", "selm", function(object, param.type="CP") { vcov <- slot(object, "param.var")[[tolower(param.type)]] if(is.null(vcov) & tolower(param.type) == "cp") { message("CP not defined, consider param.type='DP' or 'pseudo-CP'") return(NULL)} vcov} ) setMethod("show", "selm", function(object){ # cat("Object: ", deparse(substitute(obj)),"\n") cat("Object class:", class(object), "\n") cat("Call: ") print(object@call) cat("Number of observations:", object@size["n.obs"], "\n") if(!is.null(slot(object,"input")$weights)) cat("Weighted number of observations:", object@size["nw.obs"], "\n") cat("Number of covariates:", object@size["p"], "(including constant)\n") cat("Number of parameters:", object@size["n.param"], "\n") show.family <- slot(object,"family") cat("Family:", show.family,"\n") fixed <- slot(object, "fixed.param") if(length(fixed) > 0) { fixed.char <- paste(names(fixed), format(fixed), sep=" = ", collapse=", ") cat("Fixed parameters:", fixed.char, "\n") } method <- slot(object, "method") u <- if(length(method)==1) NULL else paste(", penalty function:", method[2]) cat("Estimation method: ", method[1], u, "\n", sep="") logL.name <- paste(if(method[1]=="MLE") "Log" else "Penalized log", "likelihood:", sep="-") cat(logL.name, format(object@logL, nsmall=2),"\n") if(object@param$boundary) cat("Estimates on/near the boundary of the parameter space\n") invisible(object) } ) #---------------------------------------------------------- setClass("summary.selm", representation(call="call", family="character", logL="numeric", method="character", param.type="character", param.table="matrix", param.fixed="list", resid="numeric", control="list", aux="list", size="vector", boundary="logical"), validity=function(object){ if(!(object@family %in% c("SN","ST","SC","ESN"))) return(FALSE) TRUE } ) #---------------------------------------------------------- setClass("mselm", representation(call="call", family="character", logL="numeric", method="character", param="list", param.var="list", size="vector", residuals.dp="matrix", fitted.values.dp="matrix", control="list", input="list", opt.method="list"), validity=function(object){ if(class(object) != "mselm") return(FALSE) if(!is.numeric(object@logL)) return(FALSE) if(!(object@family %in% c("SN","ST","SC","ESN"))) return(FALSE) if(!is.list(object@param$dp)) return(FALSE) TRUE } ) setMethod("logLik", "mselm", function(object){ logL <- slot(object,"logL") attr(logL, "df") <- as.numeric(slot(object, "size")["n.param"]) class(logL) <- "logLik" return(logL) } ) setMethod("coef", "mselm", function(object, param.type="CP", vector=TRUE) { list <- slot(object,"param")[[tolower(param.type)]] if(is.null(list) & tolower(param.type)=="cp") { message("CP not defined, consider param.type='DP' or 'pseudo-CP'") return(NULL)} if(!vector) return(list) as.vector(c(list[[1]], vech(list[[2]]), unlist(list[3:length(list)]))) } ) setMethod("vcov", "mselm", function(object, param.type="CP") { vcov <- slot(object,"param.var")[[tolower(param.type)]] if(is.null(vcov) & tolower(param.type) == "cp") { message("CP not defined, consider param.type='DP' or 'pseudo-CP'") return(NULL)} vcov} ) setMethod("show", "mselm", function(object){ cat("Object class:", class(object), "\n") cat("Call: ") print(object@call) cat("Number of observations:", object@size["n.obs"], "\n") if(!is.null(slot(object,"input")$weights)) cat("Weighted number of observations:", object@size["nw.obs"], "\n") cat("Dimension of the response:", object@size["d"], "\n") cat("Number of covariates:", object@size["p"], "(including constant)\n") cat("Number of parameters:", object@size["n.param"], "\n") show.family <- slot(object, "family") cat("Family:", show.family,"\n") method <- slot(object, "method") u <- if(length(method)==1) NULL else paste(", penalty function:", method[2]) cat("Estimation method: ", method[1], u, "\n", sep="") fixed <- slot(object, "param")$fixed if(length(fixed) > 0) { fixed.char <- paste(names(fixed), format(fixed), sep=" = ", collapse=", ") cat("Fixed parameters:", fixed.char, "\n") } cat("Log-likelihood:", format(object@logL, nsmall=2),"\n") if(object@param$boundary) cat("Estimates on/near the boundary of the parameter space\n") invisible(object) } ) #---------------------------------- setClass("summary.mselm", representation(call="call", family="character", logL="numeric", method="character", param.type="character", param.fixed="list", resid="matrix", coef.tables="list", scatter="list", slant="list", tail="list", control="list", aux="list", size="vector", boundary="logical"), validity=function(object) { if(!(object@family %in% c("SN","ST","SC","ESN"))) return(FALSE) TRUE } ) setMethod("plot", signature(x="SECdistrUv", y="missing"), plot.SECdistrUv) setMethod("plot", signature(x="SECdistrMv", y="missing"), plot.SECdistrMv) setMethod("plot", signature(x="selm"), plot.selm) # y="missing" not required? setMethod("plot", signature(x="mselm"), plot.mselm) setMethod("show", signature(object="summary.selm"), print.summary.selm) setMethod("show", signature(object="summary.mselm"), print.summary.mselm) setMethod("summary", signature(object="SECdistrUv"), summary.SECdistrUv) setMethod("summary", signature(object="SECdistrMv"), summary.SECdistrMv) setMethod("summary", signature(object="selm"), summary.selm) setMethod("summary", signature(object="mselm"), summary.mselm) setMethod("fitted", signature(object="selm"), fitted.selm) setMethod("fitted", signature(object="mselm"), fitted.mselm) setMethod("residuals", signature(object="selm"), residuals.selm) setMethod("residuals", signature(object="mselm"), residuals.mselm) sn/R/zzz.R0000644000176000001440000000065312262547512012115 0ustar ripleyusers.onAttach <- function(library, pkg) { # require("stats4") # require("methods") # require("mnormt") # require("numDeriv") if(interactive()) { meta <- packageDescription("sn") packageStartupMessage( "Package 'sn', ", meta$Version, " (", meta$Date, "). ", "Type 'help(SN)' for summary information.", "\n...especially so if have used version 0.x-y in the past") } invisible() } sn/MD50000644000176000001440000000371412262756006011245 0ustar ripleyusersf422c95fcf74d357c58e803a6d6660b3 *ChangeLog ea0bcac6f0692c00418362a0df9b985d *DESCRIPTION f53a5627c73d68df9fa256b133fab134 *NAMESPACE 116a06672c4b561d9074ac3233f45937 *R/sn-funct.R 800a5ac89a6bab57984e8c19cdcf3401 *R/sn_S4.R 9cff34fc1e7aaca429c2e7a579dd66ac *R/zzz.R 9186fad7fe70906719fc7f5f39e5a59c *data/ais.rda 1f07f4e955750fe3e72524a9e96a24d9 *data/barolo.rda 1d07e94091e33c744dc9bfcd644815c4 *data/frontier.rda d9062c89c0d34987548c9020bb0a2fc2 *data/wines.rda 6ba44b5fd65b0e109e4b12b84c20d533 *inst/CITATION b4c9d2a4273e3200c22155601e36a39a *man/Qpenalty.Rd 414f2a68040cdbc7e07654f4a5815260 *man/SECdistrMv-class.Rd bab56a4c692d9eb66bf5143724908da5 *man/SECdistrUv-class.Rd 3f63182baad164cf5e87610ab3da1a8d *man/T.Owen.Rd 460eabafbf1ddf65fce84fddff45496d *man/affineTransSECdistr.Rd 7dda1c9ecca749bbbbba933a8cc775c6 *man/ais.Rd 4a0bec5051785d8c2add857caee8beba *man/barolo.Rd f9361d9091a1e5b06fa9583154ddfbe6 *man/conditionalSECdistr.Rd 087578c731d16a9210597d9af467c000 *man/dmsn.Rd 071131fc0c267119b237fccc049a0d15 *man/dmst.Rd 96c56024c6f2f3f57dabd4969f5a5221 *man/dp2cp.Rd ce29a82c6d7f74067423e821d0696bed *man/dsc.Rd b4ac9091be00a987bbccfa86f74cecf5 *man/dsn.Rd 604e55cb7885777c45c24ac66c5c067d *man/dst.Rd 8218c6b9b657d978d3000a194bd374c8 *man/frontier.Rd 1cee59c55212a1f5464b3ed3b1a49d2a *man/makeSECdistr.Rd 2ec4d42b979d20537f7f0c86f6abf2ea *man/plot.SECdistr.Rd 369d96b67cd5e2c34b2beeb10603c932 *man/plot.selm.Rd 9ec5183835d7a366ee00df27b643c85b *man/selm-class.Rd 80f5e524894aa9e877287f64b7c0f9c3 *man/selm.Rd 6af646d5593caf938e0b96af10e3ae23 *man/selm.fit.Rd f1577d22428d3a2cc3845ee39ccfe9dc *man/sn-st.cumulants.Rd 34fc1f317ed639d5259d37b5b7ec0d97 *man/sn-st.info.Rd bc138da2a8b4c6bd54ddef3327948636 *man/sn.Rd ac9e41daa309fce5cf5fc2f3f389880a *man/summary.SECdistr-class.Rd 23278491bfa85ddfe770bae2e6060abb *man/summary.SECdistr.Rd fa13c81ec2ba052bdaf9cb2ecbf41e9a *man/summary.selm.Rd ba5b1d43def7b31a4c57c53992f4ae3f *man/wines.Rd 614af194724f323cbe7c81b7b8769463 *man/zeta.Rd sn/DESCRIPTION0000644000176000001440000000127612262756006012444 0ustar ripleyusersPackage: sn Version: 1.0-0 Date: 2014-01-06 Title: The skew-normal and skew-t distributions Author: Adelchi Azzalini Maintainer: Adelchi Azzalini Depends: R (>= 2.15.3), methods, mnormt, numDeriv, stats4 Description: Define and manipulate probability distributions of the skew-normal family and some related ones (notably the skew-t family) and provide related statistical methods for data fitting and diagnostics, in the univariate and in the multivariate case. License: GPL-2 | GPL-3 URL: http://azzalini.stat.unipd.it/SN Packaged: 2014-01-07 09:03:56 UTC; aa NeedsCompilation: no Repository: CRAN Date/Publication: 2014-01-07 11:50:46 sn/ChangeLog0000644000176000001440000000206612262557562012514 0ustar ripleyusers version 0.20 (Oct.1998): first public release and distribution via WWW, use optim version 0.22.1 (2001-05-17) version 0.22.2 (2002-01-05) fix error in sn.dev.gh, improved qsn version 0.30 (2002-06-15) main change is the addition of routines for (multiple) skew-t distribution; also some other routines, e.g. mle for grouped data version 0.3x (2003--2005) added some new functions (these include msn.affine, sn.mmle, sn.Einfo, sn.mle.grouped), fix various errors, and other improvements (eg. improved pst) version 0.4-0 (2006-04-11) several changes and additions are included: - many routines allow use of composite parameter 'dp' - multivariate normal and t probabilities are now computed by 'mnormt' - use of NAMESPACE introduced - some more routines introduced, eg. st.cumulants.inversion - various fixes/improvements in documentation version 1.0-0 (2014-01-06) a major upgrade of the package: key new functions are selm and makeSECdistr, with several related ones; S4 methods are adopted. Many existing functions are updated. sn/man/0000755000176000001440000000000012262741374011505 5ustar ripleyuserssn/man/dst.Rd0000644000176000001440000000614012255403655012566 0ustar ripleyusers% file sn/man/dst.Rd % This file is a component of the package 'sn' for R % copyright (C) 2002-2013 Adelchi Azzalini %--------------------- \name{dst} \alias{dst} \alias{pst} \alias{qst} \alias{rst} \title{Skew-\eqn{t} Distribution} \description{Density function, distribution function, quantiles and random number generation for the skew-\eqn{t} (ST) distribution} \usage{ dst(x, xi=0, omega=1, alpha=0, nu=Inf, dp=NULL, log=FALSE) pst(x, xi=0, omega=1, alpha=0, nu=Inf, dp=NULL, ...) qst(p, xi=0, omega=1, alpha=0, nu=Inf, tol=1e-08, dp=NULL, ...) rst(n=1, xi=0, omega=1, alpha=0, nu=Inf, dp=NULL) } \arguments{ \item{x}{vector of quantiles. Missing values (\code{NA}s) are allowed.} \item{p}{vector of probabililities.} \item{xi}{vector of location parameters.} \item{omega}{vector of scale parameters; must be positive.} \item{alpha}{vector of slant parameters. With \code{pst} and \code{qst}, it must be of length 1.} \item{nu}{degrees of freedom (scalar); default is \code{nu=Inf} which corresponds to the skew-normal distribution. } \item{dp}{a vector of length 4, whose elements represent location, scale (positive), slant and degrees of freedom, respectively. If \code{dp} is specified, the individual parameters cannot be set. } \item{n}{sample size} \item{log}{logical; if TRUE, densities are given as log-densities} \item{tol}{ a scalar value which regulates the accuracy of the result of \code{qsn}. } \item{...}{additional parameters passed to \code{integrate}} } \value{Density (\code{dst}), probability (\code{pst}), quantiles (\code{qst}) and random sample (\code{rst}) from the skew-\eqn{t} distribution with given \code{xi}, \code{omega}, \code{alpha} and \code{nu} parameters.} \section{Details}{ Typical usages are \preformatted{% dst(x, xi=0, omega=1, alpha=0, nu=Inf, log=FALSE) dst(x, dp=, log=FALSE) pst(x, xi=0, omega=1, alpha=0, nu=Inf, ...) pst(x, dp=, log=FALSE) qst(p, xi=0, omega=1, alpha=0, nu=Inf, tol=1e-8, ...) qst(x, dp=, log=FALSE) rst(n=1, xi=0, omega=1, alpha=0, nu=Inf) rst(x, dp=, log=FALSE) } } \section{Background}{ The family of skew-t distributions is an extension of the Student's \eqn{t} family, via the introduction of a \code{alpha} parameter which regulates skewness; when \code{alpha=0}, the skew-\eqn{t} distribution reduces to the usual Student's \eqn{t} distribution. When \code{nu=Inf}, it reduces to the skew-normal distribution. A multivariate version of the distribution exists. See Chapter 4 of the reference below for additional information. } \references{ % Azzalini, A. and Capitanio, A. (2003). % Distributions generated by perturbation of symmetry % with emphasis on a multivariate skew-\emph{t} distribution. % \emph{J.Roy. Statist. Soc. B} \bold{65}, 367--389. % Azzalini, A. and Capitanio, A. (2014). \emph{The Skew-normal and Related Families}. Cambridge University Press, IMS Monographs series. } \seealso{\code{\link{dmst}}, \code{\link{dsn}}} \examples{ pdf <- dst(seq(-4,4,by=0.1), alpha=3, nu=5) rnd <- rst(100, 5, 2, -5, 8) q <- qst(c(0.25,0.5,0.75), alpha=3, nu=5) pst(q, alpha=3, nu=5) # must give back c(0.25,0.5,0.75) } \keyword{distribution} sn/man/dmsn.Rd0000644000176000001440000001012112255403543012723 0ustar ripleyusers% file sn/man/dmsn.Rd % This file is a component of the package 'sn' for R % copyright (C) 1998-2013 Adelchi Azzalini %--------------------- \name{dmsn} \alias{dmsn} \alias{pmsn} \alias{rmsn} \concept{skew-normal distribution} \title{Multivariate skew-normal distribution} \description{ Probability density function, distribution function and random number generation for the multivariate skew-normal (\acronym{SN}) distribution. } \usage{ dmsn(x, xi=rep(0,length(alpha)), Omega, alpha, tau=0, dp=NULL, log=FALSE) pmsn(x, xi=rep(0,length(alpha)), Omega, alpha, tau=0, dp=NULL, ...) rmsn(n=1, xi=rep(0,length(alpha)), Omega, alpha, tau=0, dp=NULL) } \arguments{ \item{x}{for \code{dmsn}, this is either a vector of length \code{d}, where \code{d=length(alpha)}, or a matrix with \code{d} columns, giving the coordinates of the point(s) where the density must be evaluated. For \code{pmsn}, only a vector of length \code{d} is allowed.} \item{xi}{a numeric vector of length \code{d} representing the location parameter of the distribution; see \sQuote{Background}. In a call to \code{dmsn}, \code{xi} can be a matrix; in this case, its dimensions must agree with those of \code{x}.} \item{Omega}{a symmetric positive-definite matrix of dimension \code{(d,d)}; see \sQuote{Background}.} \item{alpha}{a numeric vector which regulates the slant of the density; see \sQuote{Background}. \code{Inf} values in \code{alpha} are not allowed.} \item{tau}{a single value representing the `hidden mean' parameter of the \acronym{ESN} distribution; \code{tau=0} (default) corresponds to a \acronym{SN} distribution.} \item{dp}{ a list with three elements, corresponding to \code{xi}, \code{Omega} and \code{alpha} described above; default value \code{FALSE}. If \code{dp} is assigned, individual parameters must not be specified. } \item{n}{a numeric value which represents the number of random vectors to be drawn.} \item{log}{logical (default value: \code{FALSE}); if \code{TRUE}, log-densities are returned.} \item{...}{ additional parameters passed to \code{pmnorm} }} \value{ A vector of density values (\code{dmsn}), or a single probability (\code{pmsn}) or a matrix of random points (\code{rmsn}). } \details{Typical usages are \preformatted{% dmsn(x, xi=rep(0,length(alpha)), Omega, alpha, log=FALSE) dmsn(x, dp=, log=FALSE) pmsn(x, xi=rep(0,length(alpha)), Omega, alpha, ...) pmsn(x, dp=) rmsn(n=1, xi=rep(0,length(alpha)), Omega, alpha) rmsn(n=1, dp=) } Function \code{pmsn} makes use of \code{pmnorm} from package \pkg{mnormt}; the accuracy of its computation can be controlled via \code{...} } \section{Background}{ The multivariate skew-normal distribution is discussed by Azzalini and Dalla Valle (1996). The \code{(Omega,alpha)} parametrization adopted here is the one of Azzalini and Capitanio (1999). Chapter 5 of Azzalini and Capitanio (2014) provides an extensive account, including subsequent developments. Notice that the location vector \code{xi} does not represent the mean vector of the distribution. Similarly, \code{Omega} is not \emph{the} covariance matrix of the distribution, although it is \emph{a} covariance matrix. } \references{ Azzalini, A. and Dalla Valle, A. (1996). The multivariate skew-normal distribution. \emph{Biometrika} \bold{83}, 715--726. Azzalini, A. and Capitanio, A. (1999). Statistical applications of the multivariate skew normal distribution. \emph{J.Roy.Statist.Soc. B} \bold{61}, 579--602. Full-length version available at \url{http://arXiv.org/abs/0911.2093} Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \seealso{\code{\link{dsn}}, \code{\link{dmst}}, \code{\link[mnormt]{dmnorm}}} \examples{ x <- seq(-3,3,length=15) xi <- c(0.5, -1) Omega <- diag(2) Omega[2,1] <- Omega[1,2] <- 0.5 alpha <- c(2,-6) pdf <- dmsn(cbind(x, 2*x-1), xi, Omega, alpha) rnd <- rmsn(10, xi, Omega, alpha) p1 <- pmsn(c(2,1), xi, Omega, alpha) p2 <- pmsn(c(2,1), xi, Omega, alpha, abseps=1e-12, maxpts=10000) } \keyword{distribution} \keyword{multivariate} sn/man/selm-class.Rd0000644000176000001440000001055212262547304014037 0ustar ripleyusers% file sn/man/selm-class.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{selm-class} \Rdversion{1.1} \docType{class} \alias{selm-class} \alias{coef,selm-method} \alias{logLik,selm-method} \alias{plot,selm,ANY-method} \alias{plot,selm,missing-method} \alias{show,selm-method} \alias{fitted,selm-method} \alias{residuals,selm-method} \alias{vcov,selm-method} % \alias{mselm-class} \alias{coef,mselm-method} \alias{logLik,mselm-method} \alias{plot,mselm,ANY-method} \alias{plot,mselm,missing-method} \alias{show,mselm-method} \alias{fitted,mselm-method} \alias{residuals,mselm-method} \alias{vcov,mselm-method} \title{Classes \code{"selm"} and \code{"mselm"} of objects created by function \code{selm}} \description{A successful call to function \code{selm} creates an object of either of these classes, having a structure described in section \sQuote{Slots}. A set of methods for these classes of objects exist, listed in section \sQuote{Methods}.} \section{Objects from the class}{ An object can be created by a successful call to function \code{selm}.} \section{Slots}{ \describe{ \item{\code{call}:}{the calling statement.} \item{\code{family}:}{the parametric family of skew-ellitically contoured distributed (SEC) type.} \item{\code{logL}:}{log-likelihood or penalized log-likelihood value achieved at the end of the maximization process.} \item{\code{method}:}{estimation method (\code{"MLE"} or \code{"MPLE"}).} \item{\code{param}:}{estimated parameters, for various parameterizations.} \item{\code{param.var}:}{approximate variance matrices of the parameter estimates, for various parameterizations.} \item{\code{size}:}{a numeric vector with size of various components.} \item{\code{fixed.param}:}{a vector of parameters which have been kept fixed in the fitting process, if any. Currently only \code{nu} of the \code{"ST"} family can be fixed.} \item{\code{residuals.dp}:}{residual values, for DP-type parameters.} \item{\code{fitted.values.dp}:}{fitted values, for DP-type parameters.} \item{\code{control}:}{a list with control parameters.} \item{\code{input}:}{a list of selected input values.} \item{\code{opt.method}:}{a list with details on the optimization method.} } } \section{Methods}{ \tabular{ll}{ \code{coef} \tab \code{signature(object = "selm")}: ... \cr \code{logLik} \tab \code{signature(object = "selm")}: ... \cr % \code{plot} \tab \code{signature(x = "selm", y = "ANY")}: ... \cr % \code{plot} \tab \code{signature(x = "selm", y = "missing")}: ... \cr \code{plot} \tab \code{signature(x = "selm")}: ... \cr \code{show} \tab \code{signature(object = "selm")}: ... \cr \code{summary} \tab \code{signature(object = "selm")}: ... \cr \code{residuals} \tab \code{signature(object = "selm")}: ... \cr \code{fitted} \tab \code{signature(object = "selm")}: ... \cr \code{vcov} \tab \code{signature(object = "selm")}: ... \cr \tab \cr \code{coef} \tab \code{signature(object = "mselm")}: ... \cr \code{logLik} \tab \code{signature(object = "mselm")}: ... \cr \code{plot} \tab \code{signature(x = "mselm")}: ... \cr \code{show} \tab \code{signature(object = "mselm")}: ... \cr \code{summary} \tab \code{signature(object = "mselm")}: ... \cr \code{residuals} \tab \code{signature(object = "mselm")}: ... \cr \code{fitted} \tab \code{signature(object = "mselm")}: ... \cr \code{vcov} \tab \code{signature(object = "mselm")}: ... \cr } } %\references{%% ~~put references to the literature/web site here~~} \author{Adelchi Azzalini} \note{See \code{\link{dp2cp}} for a description of possible parameter sets.} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{See also \code{\link{selm}} function, \code{\link{plot.selm}}, \code{\linkS4class{summary.selm}}, \code{\link{dp2cp}} } \examples{ data(ais) m1 <- selm(log(Fe) ~ BMI + LBM, family="SN", data=ais) summary(m1) plot(m1) logLik(m1) res <- residuals(m1) fv <- fitted(m1) # data(wines, package="sn") m2 <- selm(alcohol ~ malic + phenols, data=wines) # m12 <- selm(cbind(acidity, alcohol) ~ phenols + wine, family="SN", data=wines) coef(m12) cp <- coef(m12, vector=FALSE) dp <- coef(m12, "DP", vector=FALSE) plot(m12) plot(m12, which=2, col="gray60", pch=20) } \keyword{classes} sn/man/dp2cp.Rd0000644000176000001440000001231612255403612012777 0ustar ripleyusers% file sn/man/dp2cp.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{dp2cp} \alias{dp2cp} \alias{cp2dp} \title{Conversion between parametrizations of a skew-elliptical distribution} \description{ Convert direct parameters (\acronym{DP}) to centred parameters (\acronym{CP}) of a skew-elliptical distribution and \emph{vice versa}.} \usage{ dp2cp(dp, family, obj = NULL, cp.type = "proper", upto = NULL) cp2dp(cp, family) } \arguments{ \item{dp}{a vector (in the univariate case) or a list (in the multivariate case) as described in \code{\link{makeSECdistr}}; see \sQuote{Details} for an extented form of usage.} \item{cp}{a vector or a list, in agreement with \code{dp} as for type and dimension.} \item{family}{a characther string, as described in \code{\link{makeSECdistr}}.} \item{obj}{optionally, an S4 object of class \code{SECdistrUv} or \code{SECdistrMv}, as produced by \code{\link{makeSECdistr}} (default value: \code{NULL}). If this argument is not \code{NULL}, then \code{family} and \code{dp} must not be set.} \item{cp.type}{character string, which has effect only if \code{family="ST"} or \code{"SC"}, otherwise a warning message is generated. Possible values are \kbd{"proper", "pseudo", "auto"}, which correspond to the \acronym{CP} parameter set, their `pseudo-\acronym{CP}' version and an automatic selection based on \code{nu>4}, where \code{nu} represents the degrees of freedom of the \acronym{ST} distribution.} \item{upto}{numeric value (in \code{1:length(dp)}, default=\code{NULL}) to select how many \acronym{CP} components are computed. Default value \code{upto=NULL} is equivalent to \code{length(dp)}.} } \value{for \code{dp2cp}, a matching vector (in the univariate case) or a list (in the multivariate case) of \code{cp} parameters; for \code{cp2dp}, a similar object of \code{dp} parameters.} \section{Details and Background}{For a description of the \acronym{DP} parameters, see Section \sQuote{Details} of \code{\link{makeSECdistr}}. The \acronym{CP} form of parameterization is cumulant-based. For a univariate distribution, the \acronym{CP} components are the mean value (first cumulant), the standard deviation (square root of the 2nd cumulant), the coefficient of skewness (3rd standardized cumulant) and, for the \acronym{ST}, the coefficient of excess kurtosis (4th standardized cumulant). For a multivariate distribution, there exists an extension based on the \ same logic; its components represent the vector mean value, the variance matrix, the vector of marginal coefficients of skewness and, only for the \acronym{ST}, the Mardia's coefficient of excess kurtosis. The pseudo-\acronym{CP} variant provides an `approximate form' of \acronym{CP} when not all required cumulants exist; however, this parameter set is not uniquely invertible to \acronym{DP}. The names of pseudo-\acronym{CP} components printed in summary output are composed by adding a \code{~} after the usual component name; for example, the first one is denoted \code{mean~}. Background information is provided by Azzalini and Capitanio (2014). Specifically, their Section 3.1.4 presents \acronym{CP} in the univariate \acronym{SN} case, Section 4.3.4 \acronym{CP} for the \acronym{ST} case and the `pseudo-\acronym{CP}' version. Section 5.2.3 presents the multivariate extension for the \acronym{SN} distribution, Section 6.2.5 for the multivariate \acronym{ST} case. For a more detailed discussion, see Arellano-Valle and Azzalini (2013). It is possible to call the functions with \code{dp} or \code{cp} having more components than those expected for a given family as described above and in \code{\link{makeSECdistr}}. In the univariate case, this means that \code{dp} or \code{cp} can be vectors of longer length than indicated earlier. This occurrence is interpreted in the sense that the additional components after the first one are regarded as regression coefficients of a \code{selm} model, and they are transferred unchanged to the matching components of the transformed parameter set; the motivation is given in Section 3.1.4 of Azzalini and Capitanio (2014). In the multivariate case, \code{dp[[1]]} and \code{cp[[1]]} can be matrices instead of vectors; the rows beyond the first one are transferred unchanged to \code{cp[[1]]} and \code{dp[[1]]}, respectively. } \references{ Arellano-Valle, R. B. and Azzalini, A. (2013, available on-line 12 June 2011). The centred parameterization and related quantities of the skew-\emph{t} distribution. \emph{J. Multiv. Analysis} \bold{113}, 73-90. Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \seealso{ \code{\link{makeSECdistr}}, \code{\link{summary.SECdistr}}, \code{\link{sn.cumulants}}, the \sQuote{Note} at \code{\link{summary.selm}} } \examples{ # univariate case cp <- dp2cp(c(1, 2222, 3333, 2, 3), "SN") dp <- cp2dp(cp, "SN") # notice that 2nd and 3rd component remain unchanged # # multivariate case dp3 <- list(xi=1:3, Omega=toeplitz(1/(1:3)), alpha=c(-3, 8, 5), nu=6) cp3 <- dp2cp(dp3, "ST") dp3.back <- cp2dp(cp3, "ST") } \keyword{distribution} sn/man/plot.SECdistr.Rd0000644000176000001440000001453312255403723014432 0ustar ripleyusers% file sn/man/plot.SECdistr.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{plot.SECdistr} \docType{methods} %\alias{plot,ANY,ANY-method} % \alias{plot,profile.mle,missing-method} % \alias{show,SECdistrMv-method} % \alias{show,SECdistrUv-method} \alias{plot.SECdistr} \alias{plot.SECdistrUv} \alias{plot.SECdistrMv} \alias{plot,SECdistrMv,missing-method} \alias{plot,SECdistrUv,missing-method} \alias{plot,SECdistrMv-method} \alias{plot,SECdistrUv-method} \title{Plotting methods for classes \code{SECdistrUv} and \code{SECdistrMv}} \description{Methods for classes \code{SECdistrUv} and \code{SECdistrMv}} \usage{ \S4method{plot}{SECdistrUv}(x, range, probs, main, npt = 251, \dots) \S4method{plot}{SECdistrMv}(x, range, probs, npt, landmarks = "auto", main, comp, compLabs, data = NULL, data.par = NULL, gap = 0.5, \dots) } \arguments{ \item{x}{an object of the pertaining class.} % \item{y}{not used, required by the generic \code{plot(x, y, ...)} function.} \item{range}{in the univariate case, a vector of length 2 which defines the plotting range; in the multivariate case, a matrix with two rows where each column defines the plotting range of the corresponding component variable. If missing, a sensible choice is made.} \item{probs}{a vector of probability values. In the univariate case, the corresponding quantiles are plotted on the horizontal axis; it can be skipped by setting \code{probs=NULL}. In the multivariate case, each probability value corresponds to a contour level in each bivariate plot; at least one probability value is required. See \sQuote{Details} for further information. Default value: \code{c(0.05, 0.25, 0.5, 0.75, 0.95)} in the univariate case, \code{c(0.25, 0.5, 0.75, 0.95)} in the multivariate case.} \item{npt}{a numeric value or vector (in the univariate and in the multivariate case, respectively) to assign the number of evaluation points of the distribution, on an equally-spaced grid over the \code{range} defined above. Default value: 251 in the univariate case, a vector of 101's in the multivariate case.} \item{landmarks}{a character string which affects the placement of some landmark values in the multivariate case, that is, the origin, the mode and the mean (or its substitute pseudo-mean), which are all aligned. Possible values: \code{"proper"}, \code{"pseudo"}, \code{"auto"} (default), \code{""}. The option \code{""} prevents plotting of the landmarks. With the other options, the landmarks are plotted, with some variation in the last one: \code{"proper"} plots the proper mean value, \code{"pseudo"} plots the pseudo-mean, useful when the proper mean does not exists, \code{"auto"} plots the proper mean if it exists, otherwise it switches automatically to the pseudo-mean. See \code{\link{dp2cp}} for more information on pseudo-\acronym{CP} parameters, including pseudo-mean.} \item{main}{a character string for main title; if missing, one is built from the available ingredients.} \item{comp}{a subset of the vector \code{1:d}, if \code{d} denotes the dimensionality of the multivariate distribution.} \item{compLabs}{a vector of character strings or expressions used to denote the variables in the plot; if missing, \code{slot(object,"compNames")} is used.} \item{data}{an optional set of data of matching dimensionity of \code{object} to be superimposed to the plot. The default value \code{data=NULL} produces no effect. In the univariate case, data are plotted using \code{\link[graphics]{rug}} at the top horizontal axis, unless if \code{probs=NULL}, in which case plotting is at the bottom axis. In the multivariate case, points are plotted in the form of a scatterplot or matrix of scatterplots; this can be regulated by argument \code{data.par}.} \item{data.par}{an optional list of graphical parameters used for plotting \code{data} in the multivariate case, when \code{data} is not \code{NULL}. Recognized parameters are: \code{col}, \code{pch}, \code{cex}. If missing, the analogous components of \code{par()} are used. } \item{gap}{a numeric value which regulates the gap between panels of a multivariate plot when \code{d>2}.} \item{\dots}{additional graphical parameters} } \section{Details}{ For univariate density plots, \code{probs} are used to compute quantiles from the appropriate distribution, and these are superimposed to the plot of the density function, unless \code{probs=NULL}. In the multivariate case, each bivariate plot is constructed as a collection of contour curves, one curve for each probability level; consequently, \code{probs} cannot be missing or \code{NULL}. The level of the density contour lines are chosen so that each curve circumscribes a region with the quoted probability, to a good degree of approssimation; for additional information, see Azzalini and Capitanio (2014), specifically Complement 5.2 and p.179, and references therein. } \author{Adelchi Azzalini} \references{ Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \seealso{\code{\link{makeSECdistr}}, \code{\link{summary.SECdistr}}, \code{\link{dp2cp}}} \section{Methods}{ \describe{ % \item{\code{signature(x = "ANY", y = "ANY")}}{Generic function: see % \code{\link[graphics]{plot}}.} \item{\code{signature(x = "SECdistrUv")}}{Plot an object \code{x} of class \code{SECdistrUv}.} \item{\code{signature(x = "SECdistrMv")}}{Plot an object \code{x} of class \code{SECdistrMv}.} }} \examples{ # d=1 f1 <- makeSECdistr(dp=c(3,2,5), family="SC", name="Univariate Skew-Cauchy") plot(f1) plot(f1, range=c(-3,40), probs=NULL, col=4) # # d=2 Omega2 <- matrix(c(3, -3, -3, 5), 2, 2) f2 <- makeSECdistr(dp=list(c(10,30), Omega=Omega2, alpha=c(-3, 5)), family="sn", name="SN-2d", compNames=c("x1","x2")) plot(f2) x2 <- rmsn(100, dp=slot(f2,"dp")) plot(f2, main="Distribution 'f2'", probs=c(0.5,0.9), cex.main=1.5, col=2, cex=0.8, compLabs=c(expression(x[1]), expression(log(z[2]-beta^{1/3}))), data=x2, data.par=list(col=4, cex=0.6, pch=5)) } \keyword{methods} \keyword{hplot} sn/man/Qpenalty.Rd0000644000176000001440000000573512255403413013572 0ustar ripleyusers% file sn/man/Qpenalty.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{Qpenalty} \alias{Qpenalty} \alias{MPpenalty} \concept{penalized likelihood} \concept{prior distribution} \title{Penalty function for log-likelihood of \code{selm} models} \description{Penalty function for the log-likelihood of \code{selm} models when \code{method="MPLE"}. \code{Qpenalty} is the default function; \code{MPpenalty} is an example of a user-defined function effectively corresponding to a prior distributio on \code{alpha}. } \usage{ Qpenalty(alpha_etc, nu = NULL, der = 0) MPpenalty(alpha, der = 0) } \arguments{ \item{alpha_etc, alpha}{in the univariate case, a single value \code{alpha}; in the multivariate case, a two-component list whose first component is the vector \code{alpha}, the second one is matrix \code{cov2cor(Omega)}. } \item{nu}{degrees of freedom, only required if \code{selm} is called with \code{family="ST"}. } \item{der}{a numeric value in the set \kbd{0,1,2} which indicates the required numer of derivatives of the function. In the multivariate case the function will only be called with \code{der} equal to 0 or 1.} } \details{The penalty is a function of \code{alpha}, but its expression may depend on other ingredients, specifically \code{nu} and \code{cov2cor(Omega)}. See \sQuote{Details} of \code{\link{selm}} for additional information. The penalty mechanism allows to introduce a prior distribution \eqn{\pi} for \eqn{\alpha} by setting \eqn{Q=-\log\pi}{Q=-log(\pi)}, leading to a maximum \emph{a posteriori} estimate in the stated sense. As an illustration of this mechanism, function \code{MPpenalty} implements the `matching prior' distribution for the univariate \acronym{SN} distribution studied by Cabras \emph{et al.} (2012); their proposal is summarized in Section 3.2 of Azzalini and Capitanio (2014). Note that, besides \code{alpha=+/-Inf}, this choice also penalizes \code{alpha=0} with \code{Q=Inf}, effectively removing \code{alpha=0} from the parameter space. } \value{A positive number \code{Q} representing the penalty, possibly with attributes \code{attr(Q, "der1")} and \code{attr(Q, "der2")}, depending onthe input value \code{der}.} \references{ Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. Cabras, S., Racugno, W., Castellanos, M. E., and Ventura, L. (2012). A matching prior for the shape parameter of the skew-normal distribution. \emph{Scand. J. Statist.} \bold{39}, 236--247. } \author{Adelchi Azzalini} % \note{%% ~~further notes~~} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{selm}} function} \examples{ data(frontier) m2 <- selm(frontier ~ 1) m2a <- selm(frontier ~ 1, method="MPLE") m2b <- selm(frontier ~ 1, method="MPLE", penalty="MPpenalty") } %\keyword{ ~kwd1 } sn/man/frontier.Rd0000644000176000001440000000142012262550007013610 0ustar ripleyusers% file sn/man/frontier.Rd % This file is a component of the package 'sn' for R % copyright (C) 1998 Adelchi Azzalini %--------------------- \name{frontier} \alias{frontier} \title{Simulated sample from a skew-normal distribution} \usage{data(frontier)} \description{ A sample simulated from the SN(0,1,5) distribution with sample coefficient of skewness inside the admissible range (-0.9952719, 0.9952719) for the skew-normal family but maximum likelihood estimate on the frontier of the parameter space. } \format{A vector of length 50.} \source{Generated by a run of \code{rsn(50, 0, 1, 5)}.} \examples{ data(frontier, package="sn") fit <- selm(frontier ~ 1) plot(fit, which=2) # fit.p <- selm(frontier ~ 1, method="MPLE") plot(fit.p, which=2) } \keyword{datasets} sn/man/affineTransSECdistr.Rd0000644000176000001440000000502712255403455015636 0ustar ripleyusers% file sn/man/affineTransSECdistr.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{affineTransSECdistr} \alias{marginalSECdistr} \alias{affineTransSECdistr} \title{Affine transformations and marginals of a skew-elliptical distribution} \description{ Compute the distribution of a (multiple) marginal or of an affine transformation \eqn{a + A^{\top}Y}{a + A'Y} of a multivariate variable \eqn{Y} with skew-elliptical distribution.} \usage{ affineTransSECdistr(object, a, A, name, compNames, drop=TRUE) marginalSECdistr(object, comp, name, drop=TRUE) } \arguments{ \item{object}{an object of class \code{SECdistrMv}, as created by \code{\link{makeSECdistr}} or by a previous call to these functions} \item{a}{a numeric vector with the length \code{ncol(A)}.} \item{A}{a full-rank matrix with \code{nrow(A)} equal to the dimensionality of \code{object}. } \item{name}{an optional character string representing the name of the outcome distribution; if missing, one such string is constructed.} \item{compNames}{an optional vector of length \code{ncol(A)} of character strings with the names of the components of the outcome distribution; if missing, one such vector is constructed.} \item{drop}{a logical flag (default value: \code{TRUE}), operating only if \code{ncol(A)==1}, which indicates whether the returned object must be of class \code{SECdistrUv}.} \item{comp}{a vector formed by a subset of \code{1:d} which indicates which components must be extracted from \code{object}, on denoting by \code{d} its dimensionality.} } \value{an object of class \code{SECdistrMv}, except when \code{drop=TRUE} operates, leading to an object of class \code{SECdistrUv}.} \section{Background}{These functions implement formulae given in Sections 5.1.4, 5.1.6 and 6.2.2 of the reference below.} \references{ Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \seealso{\code{\link{makeSECdistr}}, \code{\link{SECdistrMv-class}}} \examples{ dp3 <- list(xi=1:3, Omega=toeplitz(1/(1:3)), alpha=c(3,-1,2), nu=5) st3 <- makeSECdistr(dp3, family="ST", name="ST3", compNames=c("U", "V", "W")) A <- matrix(c(1,-1,1, 3,0,-2), 3, 2) new.st <- affineTransSECdistr(st3, a=c(-3,0), A=A) # st2 <- marginalSECdistr(st3, comp=c(3,1), name="2D marginal of ST3") } \keyword{multivariate} \keyword{distribution} sn/man/conditionalSECdistr.Rd0000644000176000001440000000374712255403507015706 0ustar ripleyusers% file sn/man/conditionalSECdistr.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{conditionalSECdistr} \alias{conditionalSECdistr} \title{Skew-normal conditional distribution} \description{For a multivariate (extended) skew-normal distribution, compute its conditional distribution for given values of some of its components.} \usage{ conditionalSECdistr(object, fixed.comp, fixed.values, name, drop = TRUE) } \arguments{ \item{object}{an object of class \code{SECdistrMv} with \code{family="SN"} or \code{family="ESN"}. } \item{fixed.comp}{a vector containing a subset of \code{1:d} which selects the components whose values are to be fixed, if \code{d} denotes the dimensionality of the distribution.} \item{fixed.values}{a numeric vector of values taken on by the components \code{fixed.comp}; it must be of the same length of \code{fixed.comp}.} \item{name}{an optional character string with the name of the outcome distribution; if missing, one such string is constructed.} \item{drop}{logical (default=\code{TRUE}), to indicate whether the returned object must be of class \code{SECdistrUv} when \code{length(fixed.comp)+1=d}.} } \value{an object of class \code{SECdistrMv}, except in the case when \code{drop=TRUE} operates, leading to an object of class \code{SECdistrUv-class}.} \details{For background information, see Section 5.3.2 of the reference below.} \references{ Azzalini, A. and Capitanio, A. (2014). \emph{The Skew-normal and Related Families}. Cambridge University Press, IMS Monographs series. } \seealso{\code{\link{makeSECdistr}}, \code{\link{SECdistrMv-class}}, \code{\link{affineTransSECdistr}} } \examples{ Omega <- diag(3) + outer(1:3,1:3) sn <- makeSECdistr(dp=list(xi=rep(0,3), Omega=Omega, alpha=1:3), family="SN") esn <- conditionalSECdistr(sn, fixed.comp=2, fixed.values=1.5) show(esn) } \keyword{multivariate} \keyword{distribution} sn/man/plot.selm.Rd0000644000176000001440000001523412255403734013713 0ustar ripleyusers% file sn/man/plot.selm.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{plot.selm} \alias{plot.selm} \alias{plot.mselm} \alias{plot,selm-method} \alias{plot,mselm-method} \concept{QQ-plot} \title{Diagnostic plots for \code{selm} fits} \description{Diagnostic plots for objects of class \code{selm} and \code{mselm} generated by a call to function \code{selm}} \usage{ \S4method{plot}{selm}(x, param.type="CP", which = c(1:4), caption, panel = if (add.smooth) panel.smooth else points, main = "", ask = prod(par("mfcol")) < length(which) && dev.interactive(), ..., id.n = 3, labels.id = names(x@residuals.dp), cex.id = 0.75, identline = TRUE, add.smooth = getOption("add.smooth"), label.pos = c(4, 2), cex.caption = 1) \S4method{plot}{mselm}(x, param.type="CP", which, caption, panel = if (add.smooth) panel.smooth else points, main = "", ask = prod(par("mfcol")) < length(which) && dev.interactive(), ..., id.n = 3, labels.id = names(x@residuals.dp), cex.id = 0.75, identline = TRUE, add.smooth = getOption("add.smooth"), label.pos = c(4, 2), cex.caption = 1) } \arguments{ \item{x}{an object of class \code{selm} or \code{mselm}.} \item{param.type}{a character string which selects the type of residuals to be used for some of of the plots; possible values are: \code{"CP"} (default), \code{"DP"}, \code{"pseudo-CP"}. The various type of residuals only differ by an additive term; see \sQuote{Details} for more information.} \item{which}{if a subset of the plots is required, specify a subset of the numbers 1:4; see \sQuote{Details} for a brief description of the plots.} \item{caption}{a vector of character strings with captions to appear above the plots.} \item{panel}{panel function. The useful alternative to \code{points}, \code{panel.smooth} can be chosen by \code{add.smooth = TRUE}.} \item{main}{title to each plot, in addition to the above caption.} \item{ask}{logical; if \code{TRUE}, the user is asked before each plot.} \item{\dots}{other parameters to be passed through to plotting functions.} % see \sQuote{Details} for restrictions.} \item{id.n}{number of points to be labelled in each plot, starting with the most extreme.} \item{labels.id}{vector of labels, from which the labels for extreme points will be chosen. \code{NULL} uses observation numbers..} \item{cex.id}{magnification of point labels.} \item{identline}{logical indicating if an identity line should be added to QQ-plot and PP-plot (default: \code{TRUE}).} \item{add.smooth}{logical indicating if a smoother should be added to most plots; see also \code{panel} above.} \item{label.pos}{ positioning of labels, for the left half and right half of the graph respectively, for plots 1-3.} \item{cex.caption}{controls the size of \code{caption}.} } \details{ The meaning of \code{param.type} is described in \code{\link{dp2cp}}. However, for these plot only the first parameter component is relevant, which affects the location of the residuals; the other components are not computed. Moreover, for \acronym{QQ}-plot and \acronym{PP}-plot, \acronym{DP}-residuals are used irrespectively of \code{param.type}; see Section \sQuote{Background}. % Graphical parameters can be specified via \code{\dots}, but not those % specified by the function: \code{xlab}, \code{ylab}, \code{cex}. Values \code{which=1} and \code{which=2} have a different effect for object of class \code{"selm"} and class \code{"mselm"}. In the univariate case, \code{which=1} plots the residual values versus the fitted values if \code{p>1}, where \code{p} denotes the number of covariates including the constant; if \code{p=1}, a boxplot of the response is produced. Value \code{which=2} produces an histogram of the residuals with superimposed the fitted curve, when \code{p>1}; if \code{p=1}, a similar plot is generated using the response variable instead of the residuals. Default value for \code{which} is \code{1:4}. In the multivariate case, \code{which=1} is feasible only if \code{p=1} and it displays the data scatter with superimposed the fitted distribution. Value \code{which=2} produces a similar plot but for residuals instead of data. Default value for code{which} is \code{2:4} if \code{p>1}, otherwise \code{c(1,3,4)}. Value \code{which=3} produces a QQ-plot, both in the univariate and in the multivariate case; the difference is that the squares of normalized residuals and suitably defined Mahalanobis distances, respectively, are used in the two cases. Similarly, \code{which=4} produces a PP-plot, working in a similar fashion.} \section{Background}{ Healy-type graphical diagnostics, in the form of QQ- and PP-plots, for the multivariate distribution have been extended to the skew-normal distribution by Azzalini and Capitanio (1999, section 6.1), and subsequently further extended to the skew-\eqn{t} distribution in Azzalini and Capitanio (2003). A brief explanation in the univariate \acronym{SN} case is provided in Section 3.1.1 of Azzalini and Capitanio (2014); see also Section 3.1.6. For the univariate \acronym{ST} case, see p.102 and p.111 of the monograph. The multivariate case is discussed in Section 5.2.1 as for the \acronym{SN} distribution, in Section 6.2.6 as for the \acronym{ST} distribution. } \references{ Azzalini, A. and Capitanio, A. (1999). Statistical applications of the multivariate skew normal distribution. \emph{J.Roy.Statist.Soc. B} \bold{61}, 579--602. Full-length version available at \url{http://arXiv.org/abs/0911.2093} Azzalini, A. and Capitanio, A. (2003). Distributions generated by perturbation of symmetry with emphasis on a multivariate skew \emph{t} distribution. \emph{J.Roy. Statist. Soc. B} \bold{65}, 367--389. Full-length version available at \url{http://arXiv.org/abs/0911.2342} Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \seealso{\code{\link{selm}}, \code{\link{dp2cp}}} \examples{ data(wines) # m10 <- selm(flavanoids ~ 1, family="SN", data=wines, subset=(wine=="Barolo")) plot(m10) plot(m10, which=c(1,3)) # fig 3.1 and 3.2(a) of Azzalini and Capitanio (2014) # m18 <- selm(acidity ~ sugar + nonflavanoids + wine, family="SN", data=wines) plot(m18) plot(m18, param.type="DP") # m28 <- selm(cbind(acidity, alcohol) ~ sugar + nonflavanoids + wine, family="SN", data=wines) plot(m28, col=4) # data(ais) m30 <- selm(cbind(RCC, Hg, Fe) ~ 1, family="SN", data=ais) plot(m30, col=2, which=2) } \author{Adelchi Azzalini} \keyword{hplot} sn/man/SECdistrUv-class.Rd0000644000176000001440000000314512255403434015067 0ustar ripleyusers% file sn/man/SECdistrUv-class.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{SECdistrUv-class} \Rdversion{1.1} \docType{class} \alias{SECdistrUv-class} \alias{show,SECdistrUv-method} \title{Class \code{"SECdistrUv"}} \description{Univariate skew-elliptically contoured distributions} \section{Objects from the class}{ Objects can be created by a call to function \code{\link{makeSECdistr}} when its argument \code{dp} is a vector.} \section{Slots}{ \describe{ \item{\code{family}:}{a character string which selects the parametric family; currently, possible values are: \kbd{"SN"}, \kbd{"ESN"}, \kbd{"ST"}, \kbd{"SC"}. } \item{\code{dp}:}{a numeric vector of parameters; its length depends on the selected \code{family}.} \item{\code{name}:}{a character string with name of the distribution.} } } \section{Methods}{ \describe{ \item{show}{\code{signature(object = "SECdistrUv")}: \dots} \item{plot}{\code{signature(x = "SECdistrUv")}: \dots } \item{summary}{\code{signature(object = "SECdistrUv")}: \dots} } } \author{Adelchi Azzalini} \note{See \code{\link{makeSECdistr}} for a detailed description of \code{family} and \code{dp}.} \seealso{ \code{\linkS4class{SECdistrMv}}, \code{\link{plot,SECdistrUv-method}}, \code{\link{summary,SECdistrUv-method}} } \examples{ f2 <- makeSECdistr(dp=c(3, 5, -4, 6.5), family="ST", name="My first ST") show(f2) plot(f2) plot(f2, probs=c(1,5,9)/10) plot(f2, range=c(-30,10), probs=NULL, col=2, main=NULL) summary(f2) } \keyword{classes} sn/man/zeta.Rd0000644000176000001440000000444112255404210012725 0ustar ripleyusers% file sn/man/zeta.Rd % This file is a component of the package 'sn' for R % copyright (C) 1998,2013 Adelchi Azzalini %--------------------- \name{zeta} \alias{zeta} \concept{Mills ratio} \title{Function `log(2*pnorm(x))' and its derivatives} \description{The function \code{log(2*(pnorm(x))} and its derivatives, including inverse Mills ratio.} \usage{zeta(k, x)} \arguments{ \item{k}{an integer scalar between 0 and 5.} \item{x}{a numeric vector. Missing values (\code{NA}s) and \code{Inf}s are allowed} } \value{ a vector representing the \code{k}-th order derivative evaluated at \code{x}} \details{ For \code{k} between 0 and 5, the derivative of order \code{k} of \code{log(2*pnorm(x))} is evaluated; the derivative of order \code{k=0} refers to the function itself. If \code{k} is not integer, it is converted to integer and a warning message is generated. If \code{k<0} or \code{k>5}, \code{NULL} is returned. } \section{Background}{ The computation for \code{k>1} is reduced to the case \code{k=1}, making use of expressions given by Azzalini and Capitanio (1999); see especially the full-length version of the paper. The main facts are summarized in Section 2.1.4 of Azzalini and Capitanio (2014). For numerical stability, the evaluation of \code{zeta(1,x)} when \code{x < -50} makes use of the asymptotic expansion (26.2.13) of Abramowitz and Stegun (1964). \code{zeta(1,-x)} equals \code{dnorm(x)/pnorm(-x)} (in principle, apart from the above-mentioned asymptotic expansion), called the \emph{inverse Mills ratio}. } \references{ Abramowitz, M. and Stegun, I. A., editors (1964). \emph{Handbook of Mathematical Functions}. Dover Publications. Azzalini, A. and Capitanio, A. (1999). Statistical applications of the multivariate skew normal distribution. \emph{J.Roy.Statist.Soc. B} \bold{61}, 579--602. Full-length version available at \url{http://arXiv.org/abs/0911.2093} Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \examples{ y <- zeta(2,seq(-20,20,by=0.5)) # for(k in 0:5) curve(zeta(k,x), from=-1.5, to=5, col = k+2, add = k > 0) legend(3.5, -0.5, legend=as.character(0:5), col=2:7, lty=1) } \keyword{math} sn/man/dsc.Rd0000644000176000001440000000532512255403632012544 0ustar ripleyusers% file sn/man/dsc.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{dsc} \alias{dsc} \alias{psc} \alias{qsc} \alias{rsc} \title{Skew-Cauchy Distribution} \description{Density function, distribution function, quantiles and random number generation for the skew-Cauchy (SC) distribution.} \usage{ dsc(x, xi = 0, omega = 1, alpha = 0, dp = NULL, log = FALSE) psc(x, xi = 0, omega = 1, alpha = 0, dp = NULL) qsc(p, xi = 0, omega = 1, alpha = 0, dp = NULL) rsc(n = 1, xi = 0, omega = 1, alpha = 0, dp = NULL) } \arguments{ \item{x}{vector of quantiles. Missing values (\code{NA}s) and \code{Inf}'s are allowed.} \item{p}{vector of probabilities. Missing values (\code{NA}s) are allowed.} \item{xi}{ vector of location parameters.} \item{omega}{vector of (positive) scale parameters.} \item{alpha}{vector of slant parameters.} \item{dp}{a vector of length 3 whose elements represent the parameters described above. If \code{dp} is specified, the individual parameters cannot be set.} \item{n}{sample size.} \item{log}{logical flag used in \code{dsc} (default \code{FALSE}). When \code{TRUE}, the logarithm of the density values is returned.} } \value{density (\code{dsc}), probability (\code{psc}), quantile (\code{qsc}) or random sample (\code{rsc}) from the skew-Cauchy distribution with given \code{xi}, \code{omega} and \code{alpha} parameters or from the extended skew-normal if \code{tau!=0} } \section{Details}{ Typical usages are \preformatted{% dsc(x, xi=0, omega=1, alpha=0, log=FALSE) dsc(x, dp=, log=FALSE) psc(x, xi=0, omega=1, alpha=0) psc(x, dp= ) qsc(p, xi=0, omega=1, alpha=0) qsc(x, dp=) rsc(n=1, xi=0, omega=1, alpha=0) rsc(x, dp=) } } \section{Background}{ The skew-Cauchy distribution can be thought as a skew-\eqn{t} with tail-weight parameter \code{nu=1}. In this case special closed-form expressions of the distribution function and the quantile function have been obtained by Behboodian \emph{et al.} (2006). The key facts are summarized in Complement 4.2 of Azzalini and Capitanio (2014). A multivariate version of the distribution exists. } \references{ Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-normal and Related Families}. Cambridge University Press, IMS Monographs series. Behboodian, J., Jamalizadeh, A., and Balakrishnan, N. (2006). A new class of skew-Cauchy distributions. \emph{Statist. Probab. Lett.} \bold{76}, 1488--1493. } \seealso{ \code{\link{dst}}, \code{\link{dmsc}}} \examples{ pdf <- dsc(seq(-5,5,by=0.1), alpha=3) cdf <- psc(seq(-5,5,by=0.1), alpha=3) q <- qsc(seq(0.1,0.9,by=0.1), alpha=-2) p <- psc(q, alpha=-2) rn <- rsc(100, 5, 2, 5) } \keyword{distribution} sn/man/ais.Rd0000644000176000001440000000352012262547574012556 0ustar ripleyusers% file sn/man/ais.Rd % This file is a component of the package 'sn' for R % copyright (C) 2004-2013 Adelchi Azzalini %--------------------- \name{ais} \alias{ais} \docType{data} \encoding{UTF-8} \title{Australian Institute of Sport data} \description{Data on 102 male and 100 female athletes collected at the Australian Institute of Sport, courtesy of Richard Telford and Ross Cunningham. } \usage{data(ais)} \format{ A data frame with 202 observations on the following 13 variables. \tabular{rll}{ [,1]\tab \code{sex}\tab a factor with levels: \code{female}, \code{male}\cr [,2]\tab \code{sport}\tab a factor with levels: \code{B_Ball}, \code{Field}, \code{Gym}, \code{Netball}, \code{Row},\cr \tab\tab \code{Swim}, \code{T_400m}, \code{Tennis}, \code{T_Sprnt}, \code{W_Polo}\cr [,3]\tab \code{RCC}\tab red cell count (numeric)\cr [,4]\tab \code{WCC}\tab white cell count (numeric)\cr [,5]\tab \code{Hc}\tab Hematocrit (numeric)\cr [,6]\tab \code{Hg}\tab Hemoglobin (numeric)\cr [,7]\tab \code{Fe}\tab plasma ferritin concentration (numeric)\cr [,8]\tab \code{BMI}\tab body mass index, weight/(height)\eqn{^2}{²} (numeric)\cr [,9]\tab \code{SSF}\tab sum of skin folds (numeric)\cr [,10]\tab \code{Bfat}\tab body fat percentage (numeric)\cr [,11]\tab \code{LBM}\tab lean body mass (numeric)\cr [,12]\tab \code{Ht}\tab height, cm (numeric)\cr [,13]\tab \code{Wt}\tab weight, kg (numeric)\cr } } \details{The data have been made publicly available in connection with the book by Cook and Weisberg (1994).} \references{ Cook and Weisberg (1994), \emph{An Introduction to Regression Graphics}. John Wiley & Sons, New York. } \examples{ data(ais, package="sn") pairs(ais[,c(3:4,10:13)], col=as.numeric(ais[,1]), main = "AIS data") } \keyword{datasets} sn/man/T.Owen.Rd0000644000176000001440000000322712255656233013113 0ustar ripleyusers% file sn/man/T.Owen.Rd % This file is a component of the package 'sn' for R % copyright (C) 1997-2013 Adelchi Azzalini %--------------------- \name{T.Owen} \alias{T.Owen} \title{ Owen's function } \description{ Evaluates function \eqn{T(h,a)} studied by D.B.Owen } \usage{ T.Owen(h, a, jmax=50, cut.point=8) } \arguments{ \item{h}{ a numerical vector. Missing values (\code{NA}s) and \code{Inf} are allowed. } \item{a}{ a numerical scalar. \code{Inf} is allowed. } \item{jmax}{ an integer scalar value which regulates the accuracy of the result. See the section Details below for explanation. } \item{cut.point}{ a scalar value which regulates the behaviour of the algorithm, as explained by the details below (default value: \code{8}). }} \value{ a numerical vector } \details{ If \code{a>1} and \code{01} and \code{h>cut.point}, an asymptotic approximation is used. In the other cases, various reflection properties of the function are exploited. See the reference below for more information. } \section{Background}{ The function \emph{T(h,a)} studied by Owen (1956) is useful for the computation of the bivariate normal distribution function and related quantities, including the distribution function of a skew-normal variate; see \code{psn}. See the reference below for more information on function \eqn{T(h,a)}. } \author{Adelchi Azzalini and Francesca Furlan} \references{ Owen, D. B. (1956). Tables for computing bivariate normal probabilities. \emph{Ann. Math. Statist.} \bold{27}, 1075-1090. } \seealso{ \code{\link{psn}}} \examples{ owen <- T.Owen(1:10, 2)} \keyword{math} sn/man/summary.selm.Rd0000644000176000001440000001031212262547720014424 0ustar ripleyusers% file sn/man/summary.selm.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{summary.selm} \alias{summary.selm} \alias{summary.mselm} \alias{summary,selm-method} \alias{summary,mselm-method} \alias{summary.selm-class} \alias{summary.mselm-class} \alias{show,summary.selm-method} \alias{show,summary.mselm-method} \title{Summarizing \code{selm} fits} \description{\code{summary} method for class \code{"selm"} and \code{"mselm"}.} \usage{ \S4method{summary}{selm}(object, param.type = "CP", cov = FALSE, cor = FALSE) \S4method{summary}{mselm}(object, param.type = "CP", cov = FALSE, cor = FALSE) } \arguments{ \item{object}{an object of class \code{"selm"} or \code{"mselm"} as created by a call to function \code{selm}.} \item{param.type}{a character string which indicates the required type of parameter type; possible values are \code{"CP"} (default), \code{"DP"}, \code{"pseudo-CP"} and their equivalent lower-case expressions.} \item{cov}{a logical value, to indicate if an estimate of the variance and covariance matrix of the estimates is required (default: \code{FALSE}).} \item{cor}{a logical value, to indicate if an estimate of the correlation matrix of the estimates is required (default: \code{FALSE}).} } \value{An S4 object of class \code{summary.selm} with 12 slots. \item{\code{call}:}{the calling statement.} \item{\code{family}:}{the parametric family of skew-ellitically contoured distributed (\acronym{SEC}) type.} \item{\code{logL}:}{the maximized log-likelihood or penalized log-likelihood value} \item{\code{method}:}{estimation method (\code{"MLE"} or \code{"MPLE"})} \item{\code{param.type}:}{a characer string with the chosen parameter set.} \item{\code{param.table}:}{table of parameters, std.errors and z-values} \item{\code{fixed.param}:}{a list of fixed parameter values} \item{\code{resid}:}{residual values} \item{\code{control}:}{a list with control parameters} \item{\code{aux}:}{a list of auxiliary quantities} \item{\code{size}:}{a numeric vector with various lengths and dimensions} \item{\code{boundary}:}{a logical value which indicates whether the estimates are on the boundary of the parameter space} } \note{ There are two reasons why the default choice of \code{param.type} is \code{CP}. One is the the easier interpretation of cumulant-based quantities such as mean value, standard deviation, coefficient of skewness. The other reason is more technical and applies only to cases when the estimate of the slant parameter \code{alpha} of the \acronym{SN} distribution is close to the origin: standard asymptotic distribution theory of maximum likelihood estimates does not apply in these cases and the corresponding standard errors are not trustworthy, especially those of \code{alpha} and \code{xi} or the intercept in the regression case. The problem is especialy severe at \code{alpha=0} but to some extent propagates to its vicinity. For background information, see Sections 3.1.4--6 and 5.2.3 of Azzalini and Capitanio (2014) and references therein. This problem does not occur with the the \acronym{SC} and the \acronym{ST} distribution (unless its tail-weight parameter \code{nu} diverges, hence approaching the \code{SN} case). } \references{ Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \author{Adelchi Azzalini} \seealso{ \code{\link{selm}} function, \code{\linkS4class{selm}} (and \code{mselm}) class, \code{\link{plot.selm}}, \code{\link{dp2cp}} } \examples{ data(wines, package="sn") m5 <- selm(acidity ~ phenols + wine, family="SN", data=wines) summary(m5) summary(m5, "dp") s5 <- summary(m5, "dp", cor=TRUE, cov=TRUE) dp.cor <- slot(s5, "aux")$param.cor cov2cor(vcov(m5, "dp")) # the same # # m6 <- selm(acidity ~ phenols + wine, family="ST", data=wines) # boundary!? # m12 <- selm(cbind(acidity, alcohol) ~ phenols + wine, family="SN", data=wines) s12 <- summary(m12) coef(m12, 'dp') coef(m12, "dp", vector=FALSE) # # see other examples at function selm } \keyword{regression} sn/man/sn.Rd0000644000176000001440000001163212262740424012412 0ustar ripleyusers% file sn/man/sn.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{SN} \docType{package} \encoding{UTF-8} \alias{SN} \alias{sn-package} \concept{skew-elliptical distribution} \concept{skew-normal distribution} \title{Package \pkg{sn}: overview} \description{ The \pkg{sn} package provides facilities to define and manipulate probability distributions of the skew-normal (\acronym{SN}) family and some related ones, notably the skew-\eqn{t} (\acronym{ST}) family, and to apply connected statistical methods for data fitting and diagnostics, in the univariate and the multivariate case. } \section{A substantial upgrade}{The first version of the package has been written in 1997 (on CRAN since 1998); subsequent versions have evolved gradually up to version 0.4-18 in May 2013. The present \sQuote{version 1} of the package is a substantial re-writing of the earlier \sQuote{version 0}. Differences between \sQuote{version 0} and \sQuote{version 1} concern the core computational and graphical part as well as the user interface. The S4 protocol for classes and methods has been adopted. Broadly speaking, the available tools can be divided in two groups: the probability section and the statistics section. For a quick start, one could look at their key functions, \code{\link{makeSECdistr}} and \code{\link{selm}}, respectively, and from here explore the rest. In the probability section, one finds also functions \code{\link{dsn}}, \code{\link{dst}}, \code{\link{dmsn}} and others alike; these functions existed also in \sQuote{version 0} and their working is still very much the same (not necessarily so their code). The upgrade to \sQuote{version 1} appears more or less at the time when the companion book by Azzalini and Capitanio (2014) is published. Although the two projects are formally separate, they adopt the same notation, terminology and logical frame. This matching and the numerous references in the software documentation to specific sections of the book for background information should facilitate familiarizing with these tools.} \section{Backward Compatibility}{% There is a partial backward compatibility of \sQuote{version 1} versus \sQuote{version 0}. Some functions of the older version would work as before with virtually no change; a wider set arguments is now allowed. Functions \code{\link{dsn}}, \code{\link{dst}}, \code{\link{dmsn}} and alike fall in this category: typically, the names of the arguments have been altered, but they work as before if called with unnamed arguments; similar cases are \code{\link{msn.mle}}, \code{\link{sn.cumulants}} and \code{\link{T.Owen}}. Notice, however, that \code{\link{msn.mle}} and other fitting functions have effectively been subsumed into the more general fitting function \code{\link{selm}}. A second group of functions will work with little or even minimal changes. Specific examples are functions \code{sn.mle} and \code{sn.mle} which have become \code{\link{sn.mple}} and \code{\link{st.mple}}, with some additional arguments (again, one can achieve the same result via \code{\link{selm}}) and \code{dp.to.cp}, which has been replaced by the more general function \code{\link{dp2cp}}. Finally, some functions are not there any longer, with no similarly-working functions in the new version. The more prominent set of cases is represented by the functions for computing profile log-likelihoods. There is a long-term plan to re-instate similar facilities, possibly in a more flexible form, but not in the near future. } \section{Requirements}{ \R version 2.15-3 or higher, plus packages \pkg{mnormt}, \pkg{numDeriv}, \pkg{stats4} and \pkg{methods} in addition to `standard' packages (\pkg{graphics}, etc.) } \section{Version}{ The command \code{citation("sn")} indicates, among other information, the running version of the package. The most recent version of the package can be obtained from the WWW page: \url{http://azzalini.stat.unipd.it/SN} which also contains other related material. } \section{Author}{Adelchi Azzalini. % Dipart. Scienze Statistiche, Università di Padova, Italia. Please send comments, error reports, etc. to the author whose web page is \url{http://azzalini.stat.unipd.it/}. } \section{Licence}{ This package and its documentation are usable under the terms of the \dQuote{GNU General Public License} version 3 or version 2, as you prefer; a copy of them is available from \url{http://www.R-project.org/Licenses/}. While the software is freely usable, it would be appreciated if a reference is inserted in publications or other work which makes use of it; for this purpose, see the command \code{citation("sn")}. } \references{ Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \keyword{multivariate} \keyword{distribution} \keyword{univar} \keyword{regression} sn/man/selm.fit.Rd0000644000176000001440000002054612262547325013524 0ustar ripleyusers% file sn/man/selm.fit.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{selm.fit} \alias{selm.fit} \alias{sn.mple} \alias{st.mple} \alias{msn.mle} \alias{msn.mple} \alias{mst.mple} \title{Fitting functions for \code{selm} models} \description{A call to \code{selm} activates a call to \code{selm.fit} and from here to some other function which actually performs the parameter search, one among those listed below. These lower-level functions can be called directly for increased efficiency, at the expense of a little more programming effort.} \usage{ selm.fit(x, y, family = "SN", start = NULL, w, fixed.param = list(), offset = NULL, selm.control) sn.mple(x, y, cp = NULL, w, penalty = NULL, trace = FALSE) st.mple(x, y, dp = NULL, fixed.nu = NULL, w, penalty = NULL, trace = FALSE) msn.mle(x, y, start = NULL, w, trace = FALSE, opt.method = c("nlminb", "Nelder-Mead", "BFGS", "CG", "SANN"), control = list()) msn.mple(x, y, start = NULL, w, trace = FALSE, penalty = NULL, opt.method = c("nlminb", "Nelder-Mead", "BFGS", "CG", "SANN"), control = list()) mst.mple(x, y, start = NULL, w, penalty, fixed.nu = NULL, trace = FALSE, opt.method = c("nlminb", "Nelder-Mead", "BFGS", "CG", "SANN"), control = list()) } \arguments{ \item{x}{a design matrix with the first column of all 1's.} \item{y}{a vector or a matrix of response values such that \code{NROW(y)=nrow(x)}.} \item{family}{a character string which selects the parametric family of distributions assumed for the error term of the regression model. It must one of \code{"SN"} (default), \code{"ST"} or \code{"SC"}, which correspond to the skew-normal, the skew-\emph{t} and the skew-Cauchy family, respectively. See \code{\link{makeSECdistr}} for more information on these families and the skew-elliptically contoured (\acronym{SEC}) distributions; notice that family \code{"ESN"} is not allowed here.} \item{start, dp, cp}{a vector or a list of initial parameter values, depeding whether \code{y} is a vector or a matrix. It is assumed that \code{cp} is given in the \acronym{CP} parameterization, \code{dp} and \code{start} in the \acronym{DP} parameterization. } \item{w}{a vector of non-negative integer weights of length equal to \code{NROW(y)}; if missing, a vector of all 1's is generated.} \item{penalty}{the penalty function of the log-likelihood; default value \code{NULL} corresponds to no penalty.} \item{fixed.param}{a list of assignments of parameter values to be kept fixed during the optimization process. Currently, there is only one such option, namely \code{fixed.param=list(nu='value')}, to fix the degrees of freedom at the named \code{'value'} when \code{family="ST"}, for instance \code{list(nu=3)}. Setting \code{fixed.param=list(nu=1)} is equivalent to select \code{family="SC"}.} \item{offset}{this can be used to specify an \emph{a priori} known component to be included in the linear predictor during fitting. This should be \code{NULL} or a numeric vector of length equal to the number of cases. One or more \code{\link{offset}} terms can be included in the formula instead or as well, and if more than one are specified their sum is used.} %See \code{\link[stats]{model.offset}.} \item{trace}{a logical value which regulates printing of successive calls to the target function; default value is \code{FALSE} which suppresses printing.} \item{fixed.nu}{a positive value to keep fixed the parameter \code{nu} of the \acronym{ST} distribution in the optimization process; with default value \code{NULL}, \code{nu} is estimated like the other parameters.} \item{opt.method}{a character string which selects the optimization method within the set \code{c("nlminb", "Nelder-Mead", "BFGS", "CG", "SANN")}; the last four of these are \code{"methods"} of function \code{optim}.} \item{selm.control}{a list whose components regulate the working of \code{selm.fit}; see \sQuote{Details} for their description;} \item{control}{a list of control items passed to the optimization function.} } \details{ A call to \code{selm} produces a call to \code{selm.fit} which selects the appropriate function among \code{sn.mple}, \code{st.mple}, \code{msn.mle}, \code{msn.mple}, \code{mst.mple}, depending on the arguments of the calling statement. Of these functions, \code{sn.mple} works in \acronym{CP} space; the others in the \acronym{DP} space. In all cases, a correspondig mapping to the alternative parameter space is performed before exiting \code{selm.fit}, in addition to the selected parameter set. The components of \code{selm.control} are as follows: \itemize{ \item \code{method}: the estimation method, \code{"MLE"} or \code{"MPLE"}. \item \code{penalty}: a string with the name of the penalty function. \item \code{info.type}: a string with the name of the information matrix, \code{"observed"} or \code{"expected"}; currently fixed at "observed". \item \code{opt.method}: a character string which selects the optimization method. \item \code{opt.control}: a list of control parameters of \code{opt.method}. } Function \code{msn.mle} is unchanged from version 0.4-x of the package. Functions \code{sn.mple} and \code{mst.mple} work like \code{sn.mle} and \code{mst.mle} in version 0.4-x if argument \code{penalty} is not set or is set to \code{NULL}. } \value{A list whose specific components depend on the named function. Typical components are: \item{call}{the calling statement} \item{dp}{vector or list of estimated \acronym{DP} parameters} \item{cp}{vector or list of estimated \acronym{CP} parameters} \item{logL}{the maximized (penalized) log-likelihood} \item{aux}{a list with auxiliary output values, depending on the function} \item{opt.method}{a list produced by the numerical \code{opt.method}} } \section{Background}{ Computational aspects of maximum likelihood estimation for univariate \acronym{SN} distributions are discussed in Section 3.1.7 of Azzalini and Capitanio (2014). The working of \code{sn.mple} follows these lines; maximization is performed in the \acronym{CP} space. All other functions operate on the \acronym{DP} space. The technique underlying \code{msn.mle} is based on a partial analytical maximization, leading implicitly to a form of profile log-likelihood. This scheme is formulated in detail in Section 6.1 of Azzalini and Capitanio (1999) and summarized in Section 5.2.1 of Azzalini and Capitanio (2014). The same procedure is not feasible when one adopts \acronym{MPLE}; hence function \code{msn.mple} has to maximize over a larger parameter space. Maximization of the univariate \acronym{ST} log-likelihood is speeded-up by using the expressions of the gradient given by DiCicio and Monti (2011), reproduced with inessential variants in Section 4.3.3 of Azzalini and Capitanio (2014). The working of \code{mst.mple} is based on a re-parameterization described in Section 5.1 of Azzalini and Capitanio (2003). The expressions of the corresponding log-likelihood derivatives are given in Appendix B of the full version of the paper. } \references{ Azzalini, A. and Capitanio, A. (1999). Statistical applications of the multivariate skew normal distribution. \emph{J.Roy.Statist.Soc. B} \bold{61}, 579--602. Full-length version available at \url{http://arXiv.org/abs/0911.2093} Azzalini, A. and Capitanio, A. (2003). Distributions generated by perturbation of symmetry with emphasis on a multivariate skew \emph{t} distribution. \emph{J.Roy. Statist. Soc. B} \bold{65}, 367--389. Full-length version available at \url{http://arXiv.org/abs/0911.2342} Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. T. J. DiCicio and A. C. Monti (2011). Inferential aspects of the skew \eqn{t}-distribution. \emph{Quaderni di Statistica} \bold{13}, 1--21. } \author{Adelchi Azzalini} % \note{} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{selm}} function} \examples{ data(wines, package="sn") X <- model.matrix(~ phenols + wine, data=wines) fit <- msn.mle(x=X, y=cbind(wines$acidity, wines$alcohol), opt.method="BFGS") } \keyword{regression} \keyword{multivariate} sn/man/summary.SECdistr.Rd0000644000176000001440000000513512255404151015143 0ustar ripleyusers% file sn/man/summary.SECdistr.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{summary.SECdistr} \alias{summary.SECdistr} \alias{summary.SECdistrUv} \alias{summary.SECdistrMv} \alias{summary,SECdistrUv-method} \alias{summary,SECdistrMv-method} \title{Summary of a \acronym{SEC} distribution object} \description{Produce a summary of an object of class either \code{"SECdistrUv"} or \code{"SECdistrMv"}, which refer to a univariate or a multivariate \acronym{SEC} distribution, respectively. Both types of objects are produced by \code{makeSECditr}. } \usage{ \S4method{summary}{SECdistrUv}(object, cp.type = "auto", probs) \S4method{summary}{SECdistrMv}(object, cp.type = "auto") } \arguments{ \item{object}{an object of class \code{"SECdistrUv"} or \code{"SECdistrMv"}.} \item{cp.type}{a character string to select the required variance of \acronym{CP} parameterization; possible values are \code{"proper"}, \code{"pseudo"}, \code{"auto"} (default). For a description of these codes, see \code{\link{dp2cp}}.} \item{probs}{in the univariate case, a vector of probabilities for which the corresponding quantiles are required.} } %\details{%% ~~ If necessary, more details than the description above ~~} \value{A list with the following components: \item{family}{name of the family within the \acronym{SEC} class, character} \item{dp}{\acronym{DP} parameters, list or vector} \item{name}{the name of the distribution, character string} \item{compNames}{in the multivariate case, the names of the components} \item{cp}{\acronym{CP} parameters, list or vector} \item{cp.type}{the name of the selected variant of the \acronym{CP} set} \item{aux}{a list with auxiliary ingredients (mode, coefficients of skewness and kurtosis, in the parametric and non-parametric variants, and more).} } \author{Adelchi Azzalini} %\note{%% ~~further notes~~} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{makeSECdistr}} } \examples{ f3 <- makeSECdistr(dp=c(3,2,5), family="SC") summary(f3) print(s <- summary(f3, probs=(1:9)/10)) print(slot(s, "aux")$mode) # dp3 <- list(xi=1:3, Omega=toeplitz(1/(1:3)), alpha=c(-3, 8, 5), nu=6) st3 <- makeSECdistr(dp=dp3, family="ST", compNames=c("U", "V", "W")) s <- summary(st3) print(slot(s, "aux")$delta.star) print(slot(s, "aux")$mardia) # dp2 <- list(xi=rep(0,2), Omega=matrix(c(2,2,2,4),2,2), alpha=c(3,-5), tau=-1) esn2 <- makeSECdistr(dp=dp2, family="ESN", name="ESN-2d") summary(esn2) } \keyword{multivariate} \keyword{distribution} sn/man/makeSECdistr.Rd0000644000176000001440000001200312255403711014276 0ustar ripleyusers% file sn/man/makeSECdistr.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{makeSECdistr} \encoding{UTF-8} \alias{makeSECdistr} \concept{skew-elliptical distribution} \title{Build a skew-elliptically contoured distribution} \description{Build an object which identifies a skew-elliptically contoured distribution (\acronym{SEC}), in the univariate and in the multivariate case. The term \sQuote{skew-elliptical distribution} is a synonym of \acronym{SEC} distribution.} \usage{makeSECdistr(dp, family, name, compNames)} \arguments{ \item{dp}{a numeric vector (in the univariate case) or a list (in the multivariate case) of parameters which identify the specific distribution within the named \code{family}. See \sQuote{Details} for their expected structure.} \item{family}{a character string which identifies the parametric family; currently, possible values are: \kbd{"SN"}, \kbd{"ESN"}, \kbd{"ST"}, \kbd{"SC"}. See \sQuote{Details} for additional information.} \item{name}{an optional character string with the name of the distribution. If missing, one is created.} \item{compNames}{in the multivariate case, an optional vector of character strings with the names of the component variables; its length must be equal to the dimensionality of the distribution being generated. If missing, the components are named \code{"V1"}, \code{"V2"}, \dots} } \details{If \code{dp} is a vector, a univariate distribution is built. Alternatively, if \code{dp} is a list, a multivariate distribution is built. In both cases, the expected number of components of \code{dp} depends on \code{family}: it must be is \code{3} for \kbd{"SN"} and \kbd{"SC"}; it must be \code{4} for \kbd{"ESN"} and \kbd{"ST"}. In the univariate case, the first three components of \code{dp} represent what in their specific distributions are denoted \code{xi} (location), \code{omega} (scale, positive) and \code{alpha} (slant); see functions \code{\link{dsn}}, \code{\link{dst}}, \code{\link{dsc}} for their description. The fourth component, when it exists, represents either \code{tau} (hidden variable mean) for \kbd{"ESN"} or \code{nu} (degrees of freedom) for \kbd{"ST"}. The names of the individual parameters are attached to the components of \code{dp} in the returned object. In the multivariate case, \code{dp} is a list with components having similar role as in the univariate case, but \code{xi=dp[[1]]} and \code{alpha=dp[[3]]} are now vectors and the scale parameter \code{Omega=dp[[2]]} is a symmetric positive-definite matrix. For a multivariate distribution of dimension 1 (which can be created, although a warning message is issued), \code{Omega} corresponds to the square of \code{omega} in the univariate case. Vectors \code{xi} and \code{alpha} must be of length \code{ncol(Omega)}. See also functions \code{\link{dmsn}}, \code{\link{dmst}} and \code{\link{dmsc}}. The fourth component, when it exists, is a scalar with the same role as in the univariate case. In the univariate case \code{alpha=Inf} is allowed, but in the multivariate case all components of the vector \code{alpha} must be finite. For background information, see Azzalini and Capitanio (2014), specifically Chapers 2 and 4 for univariate cases, Chapters 5 and 6 for multivariate cases; Section 6.1 provides a general formulation of \acronym{SEC} distributions. } \value{In the univariate case, an object of class \code{SECdistrUv}; in the multivariate case, an object of class \code{SECdistrMv}. See \code{\link{SECdistrUv-class}} and \code{\link{SECdistrMv-class}} for their description. } \references{ Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \author{Adelchi Azzalini} \seealso{ The description of classes \code{\link{SECdistrUv-class}} and \code{\link{SECdistrMv-class}} \code{\link{plot.SECdistr}} for plotting and \code{\link{summary.SECdistr}} for summaries Related functions \code{\link{dsn}}, \code{\link{dst}}, \code{\link{dsc}}, \code{\link{dmsn}}, \code{\link{dmst}}, \code{\link{dp2cp}} Objects of class \code{\link{SECdistrMv-class}} can be manipulated with \code{\link{affineTransSECdistr}} and \code{\link{conditionalSECdistr}} } \examples{ f1 <- makeSECdistr(dp=c(3,2,5), family="SN", name="First-SN") show(f1) summary(f1) plot(f1) plot(f1, probs=c(0.1, 0.9)) # f2 <- makeSECdistr(dp=c(3, 5, -4, 8), family="ST", name="First-ST") f9 <- makeSECdistr(dp=c(5, 1, Inf, 0.5), family="ESN", name="ESN,alpha=Inf") # dp0 <- list(xi=1:2, Omega=diag(3:4), alpha=c(3, -5)) f10 <- makeSECdistr(dp=dp0, family="SN", name="SN-2d", compNames=c("u1", "u2")) # dp1 <- list(xi=1:2, Omega=diag(1:2)+outer(c(3,3),c(2,2)), alpha=c(-3, 5), nu=6) f11 <- makeSECdistr(dp=dp1, family="ST", name="ST-2d", compNames=c("t1", "t2")) } \keyword{distribution} \keyword{multivariate} sn/man/wines.Rd0000644000176000001440000000705612257240460013123 0ustar ripleyusers% file sn/man/wines.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{wines} \alias{wines} \docType{data} \encoding{UTF-8} \title{Piedmont wines data} \description{Data refer to chemical properties of 178 specimens of 3 types of wine produced in the Piedmont region of Italy. } \usage{data(wines)} \format{ A data frame with 178 observations on the following 28 variables. \tabular{ll}{% \code{wine}\tab wine name, a factor with levels \code{Barbera}, \code{Barolo}, \code{Grignolino}\cr \code{alcohol}\tab alcohol percentage, numeric\cr \code{sugar}\tab sugar-free extract, numeric\cr \code{acidity}\tab fixed acidity, numeric\cr \code{tartaric}\tab tartaric acid, numeric\cr \code{malic}\tab malic acid, numeric\cr \code{uronic}\tab uronic acids, numeric\cr \code{pH}\tab pH, numeric\cr \code{ash}\tab ash, numeric\cr \code{alcal_ash}\tab alcalinity of ash, numeric\cr \code{potassium}\tab potassium, numeric\cr \code{calcium}\tab calcium, numeric\cr \code{magnesium}\tab magnesium, numeric\cr \code{phosphate}\tab phosphate, numeric\cr \code{cloride}\tab chloride, numeric\cr \code{phenols}\tab total phenols, numeric\cr \code{flavanoids}\tab flavanoids, numeric\cr \code{nonflavanoids}\tab nonflavanoid phenols, numeric\cr \code{proanthocyanins}\tab proanthocyanins, numeric\cr \code{colour}\tab colour intensity, numeric\cr \code{hue}\tab hue, numeric\cr \code{OD_dw}\tab \eqn{OD_{280}/OD_{315}}{OD₂₈₀/OD₃₁₅} of diluted wines, numeric\cr \code{OD_fl}\tab \eqn{OD_{280}/OD_{315}}{OD₂₈₀/OD₃₁₅} of flavanoids, numeric\cr \code{glycerol}\tab glycerol, numeric\cr \code{butanediol}\tab 2,3-butanediol, numeric\cr \code{nitrogen}\tab total nitrogen, numeric\cr \code{proline}\tab proline, numeric\cr \code{methanol}\tab methanol, numeric\cr } } \details{ The data represent 27 chemical measurements on each of 178 wine specimens belonging to three types of wine produced in the Piedmont region of Italy. The data have been presented and examined by Forina \emph{et al.} (1986) and were freely accessible from the \acronym{PARVUS} web-site until it was active. These data or, more often, a subset of them are now available from various places, including some \R packages. The present dataset includes all variables available on the \acronym{PARVUS} repository, which are the variables listed by Forina \emph{et al.} (1986) with the exception of \sQuote{Sulphate}. Moreover, it reveals the undocumented fact that the original dataset appears to include also the vintage year; see the final portion of the \sQuote{Examples}. } \source{ Forina, M., Lanteri, S. Armanino, C., Casolino, C., Casale, M. and Oliveri, P. \acronym{V-PARVUS 2008}: an extendible package of programs for esplorative data analysis, classification and regression analysis. Dip. Chimica e Tecnologie Farmaceutiche ed Alimentari, Università di Genova, Italia. Web-site: \url{http://www.parvus.unige.it} } \references{ Forina M., Armanino C., Castino M. and Ubigli M. (1986). Multivariate data analysis as a discriminating method of the origin of wines. \emph{Vitis} \bold{25}, 189--201. } \examples{ data(wines) pairs(wines[,c(2,3,16:18)], col=as.numeric(wines$wine)) # code <- substr(rownames(wines), 1, 3) table(wines$wine, code) # year <- as.numeric(substr(rownames(wines), 6, 7)) table(wines$wine, year) # coincides with Table 1(a) of Forina et al. (1986) } \keyword{datasets} sn/man/sn-st.cumulants.Rd0000644000176000001440000000374612255404021015047 0ustar ripleyusers% file sn/man/sn-st.cumulants.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{sn-st.cumulants} \alias{sn.cumulants} \alias{st.cumulants} \concept{cumulant} \title{Cumulants of univariate skew-normal and skew-\eqn{t} distributions} \description{Compute cumulants of univariate (extended) skew-normal and skew-\eqn{t} distributions up to a given order.} \usage{ sn.cumulants(xi=0, omega=1, alpha=0, tau=0, dp=NULL, n=4) st.cumulants(xi=0, omega=1, alpha=0, nu=Inf, dp=NULL, n=4) } \arguments{ \item{xi}{location parameters (numeric vector)} \item{omega}{scale parameters (numeric vector, positive)} \item{alpha}{slant parameters (numeric vector)} \item{tau}{hidden mean parameter (numeric scalar)} \item{nu}{degrees of freedom (numeric scalar, positive); the default value is \code{nu=Inf} which corresponds to the skew-normal distribution.} \item{dp}{a vector containing the appropriate set of parameters. If 0 \code{dp} is not \code{NULL}, the individual parameters must not be supplied.} \item{n}{maximal order of the cumulants. For \code{st.cumulants} and for \code{sn.cumulants} with \code{tau!=0} (\acronym{ESN} distribution), it cannot exceed 4} } \section{Background}{ See Sections 2.1.4, 2.2.3 and 4.3.1 of the reference below} \value{A vector of length \code{n} or a matrix with \code{n} columns, in case the input values are vectors.} \references{ Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \author{Adelchi Azzalini} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{dsn}}, \code{\link{dsn}}} \examples{ sn.cumulants(omega=2, alpha=c(0, 3, 5, 10), n=5) sn.cumulants(dp=c(0, 3, -8), n=6) st.cumulants(dp=c(0, 3, -8, 5), n=6) # only four of them are computed st.cumulants(dp=c(0, 3, -8, 3)) } \keyword{distribution} sn/man/SECdistrMv-class.Rd0000644000176000001440000000415312255403425015057 0ustar ripleyusers% file sn/man/SECdistrMv-class.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{SECdistrMv-class} \Rdversion{1.1} \docType{class} \alias{SECdistrMv-class} \alias{show,SECdistrMv-method} \title{Class \code{"SECdistrMv"}} \description{Multivariate skew-elliptically contoured distributions} \section{Objects from the Class}{ Objects can be created by a call to function \code{\link{makeSECdistr}}, when its argument \code{dp} is a list, or by a suitable transformation of some object of this class.} \section{Slots}{ \describe{ \item{\code{family}:}{a character string which identifies the parametric family; currently, possible values are: \kbd{"SN"}, \kbd{"ESN"}, \kbd{"ST"}, \kbd{"SC"}.} \item{\code{dp}:}{a list of parameters; its length depends on the selected \code{family}.} \item{\code{name}:}{a character string with the name of the multivariate variable; it can be an empty string.} \item{\code{compNames}:}{a vector of character strings with the names of the component variables.} } } \section{Methods}{ \describe{ \item{show}{\code{signature(object = "SECdistrMv-class")}: \dots } \item{plot}{\code{signature(x = "SECdistrMv-class")}: \dots } \item{summary}{\code{signature(object = "SECdistrMv-class")}: \dots } } } \author{Adelchi Azzalini} \note{See \code{\link{makeSECdistr}} for a detailed description of \code{family} and \code{dp}. If an object of this class is manipulated by \code{affineTransSECdistr} or \code{marginalSECdistr}, the returned object is of the same class, unless the transformation leads to a univariate distribution.} \seealso{ \code{\linkS4class{SECdistrUv}}, \code{\link{plot,SECdistrMv-method}}, \code{\link{summary,SECdistrMv-method}}, \code{\link{affineTransSECdistr}}, \code{\link{marginalSECdistr}} } \examples{ dp0 <- list(xi=1:2, Omega=diag(3:4), alpha=c(3, -5)) f10 <- makeSECdistr(dp=dp0, family="SN", name="SN-2D", compNames=c("x", "y")) show(f10) plot(f10) summary(f10) } \keyword{classes} sn/man/dmst.Rd0000644000176000001440000001256512255403577012756 0ustar ripleyusers% file sn/man/dmst.Rd % This file is a component of the package 'sn' for R % copyright (C) 2002-2013 Adelchi Azzalini %--------------------- \name{dmst} \alias{dmst} \alias{pmst} \alias{rmst} \alias{dmsc} \alias{pmsc} \alias{rmsc} \title{Multivariate skew-\eqn{t} distribution and skew-Cauchy distribution} \description{Probability density function, distribution function and random number generation for the multivariate skew-\eqn{t} (\acronym{ST}) and skew-Cauchy (\acronym{SC}) distributions.} \usage{ dmst(x, xi=rep(0,length(alpha)), Omega, alpha, nu=Inf, dp=NULL, log=FALSE) pmst(x, xi=rep(0,length(alpha)), Omega, alpha, nu=Inf, dp=NULL, ...) rmst(n=1, xi=rep(0,length(alpha)), Omega, alpha, nu=Inf, dp=NULL) dmsc(x, xi=rep(0,length(alpha)), Omega, alpha, dp=NULL, log=FALSE) pmsc(x, xi=rep(0,length(alpha)), Omega, alpha, dp=NULL, ...) rmsc(n=1, xi=rep(0,length(alpha)), Omega, alpha, dp=NULL) } \arguments{ \item{x}{for \code{dmst} and \code{dmsc}, this is either a vector of length \code{d}, where \code{d=length(alpha)}, or a matrix with \code{d} columns, representing the coordinates of the point(s) where the density must be avaluated; for \code{pmst} and \code{pmsc}, only a vector of length \code{d} is allowed.} \item{xi}{a numeric vector of lenght \code{d}, or a matrix with \code{d} columns, representing the location parameter of the distribution; see \sQuote{Background}. If \code{xi} is a matrix, its dimensions must agree with those of \code{x}.} \item{Omega}{a symmetric positive-definite matrix of dimension \code{(d,d)}; see Section \sQuote{Background}.} \item{alpha}{a numeric vector of length \code{d} which regulates the slant of the density; see Section \sQuote{Background}. \code{Inf} values in \code{alpha} are not allowed.} \item{nu}{a positive value representing the degrees of freedom of \acronym{ST} distribution; default value is \code{nu=Inf} which corresponds to the multivariate skew-normal distribution.} \item{dp}{a list with three elements named \code{xi}, \code{Omega}, \code{alpha} and \code{nu}, containing quantities as described above. If \code{dp} is specified, this prevents specification of the individual parameters.} \item{n}{a numeric value which represents the number of random vectors to be drawn; default value is \code{1}.} \item{log}{logical (default value: \code{FALSE}); if \code{TRUE}, log-densities are returned.} \item{...}{additional parameters passed to \code{pmt}.} } \value{A vector of density values (\code{dmst} and \code{dmsc}) or a single probability (\code{pmst} and \code{pmsc}) or a matrix of random points (\code{rmst} and \code{rmst}).} \details{Typical usages are \preformatted{% dmst(x, xi=rep(0,length(alpha)), Omega, alpha, nu=Inf, log=FALSE) dmst(x, dp=, log=FALSE) pmst(x, xi=rep(0,length(alpha)), Omega, alpha, nu=Inf, ...) pmst(x, dp=, ...) rmst(n=1, xi=rep(0,length(alpha)), Omega, alpha, nu=Inf) rmst(n=1, dp=) dmsc(x, xi=rep(0,length(alpha)), Omega, alpha, log=FALSE) dmsc(x, dp=, log=FALSE) pmsc(x, xi=rep(0,length(alpha)), Omega, alpha, ...) pmsc(x, dp=, ...) rmsc(n=1, xi=rep(0,length(alpha)), Omega, alpha) rmsc(n=1, dp=) } Function \code{pmst} requires \code{\link[mnormt]{dmt}} from package \pkg{mnormt}; the accuracy of its computation can be controlled via argument \code{\dots}.} \section{Background}{ The family of multivariate \acronym{ST} distributions is an extension of the multivariate Student's \eqn{t} family, via the introduction of a \code{alpha} parameter which regulates asymmetry; when \code{alpha=0}, the skew-\eqn{t} distribution reduces to the commonly used form of multivariate Student's \eqn{t}. Further, location is regulated by \code{xi} and scale by \code{Omega}, when its diagonal terms are not all 1's. When \code{nu=Inf} the distribution reduces to the multivariate skew-normal one; see \code{dmsn}. Notice that the location vector \code{xi} does not represent the mean vector of the distribution (which in fact may not even exist if \code{nu <= 1}), and similarly \code{Omega} is not \emph{the} covariance matrix of the distribution, although it is \emph{a} covariance matrix. For additional information, see Section 6.2 of the reference below. The family of multivariate \acronym{SC} distributions is the subset of the \acronym{ST} family, obtained when \code{nu=1}. While in the univariate case there are specialized functions for the \acronym{SC} distribution, \code{dmsc}, \code{pmsc} and \code{rmsc} simply make a call to \code{dmst, pmst, rmst} with argument \code{nu} set equal to 1.} \references{ % Azzalini, A. and Capitanio, A. (2003). % Distributions generated by perturbation of symmetry % with emphasis on a multivariate skew \emph{t} distribution. % \emph{J.Roy. Statist. Soc. B} \bold{65}, 367--389. Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monograph series. } \seealso{ \code{\link{dst}}, \code{\link{dsc}}, \code{\link{dmsn}}, \code{\link[mnormt]{dmt}}, \code{\link{makeSECdistr}} } \examples{ x <- seq(-4,4,length=15) xi <- c(0.5, -1) Omega <- diag(2) Omega[2,1] <- Omega[1,2] <- 0.5 alpha <- c(2,2) pdf <- dmst(cbind(x,2*x-1), xi, Omega, alpha, 5) rnd <- rmst(10, xi, Omega, alpha, 6) p1 <- pmst(c(2,1), xi, Omega, alpha, nu=5) p2 <- pmst(c(2,1), xi, Omega, alpha, nu=5, abseps=1e-12, maxpts=10000) } \keyword{distribution} \keyword{multivariate} sn/man/selm.Rd0000644000176000001440000003712212262547267012746 0ustar ripleyusers% file sn/man/selm.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{selm} \encoding{UTF-8} \alias{selm} \concept{regression} \concept{skew-elliptical distribution} \title{Fitting linear models with skew-elliptical error term} \description{Function \emph{selm} fits a \strong{l}inear \strong{m}odel with \strong{s}kew-\strong{e}lliptical error term. The term skew-elliptical distribution is an abbreviated equivalent of skew-elliptically contoured (\acronym{SEC}) distribution. The function works for univariate and multivariate response variables.} \usage{ selm(formula, family = "SN", data, weights, subset, na.action, start = NULL, fixed.param = list(), method = "MLE", penalty=NULL, offset, model = TRUE, x = FALSE, y = FALSE, ...) } \arguments{ \item{formula}{an object of class \code{"\link[stats]{formula}"} (or one that can be coerced to that class): a symbolic description of the model to be fitted, using the same syntax used for the similar parameter of e.g. \code{"\link[stats]{lm}"}, with the restriction that the constant term must not be removed from the linear predictor. % The details of model specification are given under \sQuote{Details}. } \item{family}{a character string which selects the parametric family of \acronym{SEC} type assumed for the error term. It must one of \code{"SN"} (default), \code{"ST"} or \code{"SC"}, which correspond to the skew-normal, the skew-\emph{t} and the skew-Cauchy family, respectively. See \code{\link{makeSECdistr}} for more information on these families and the set of \acronym{SEC} distributions; notice that family \code{"ESN"} listed there is not allowed here.} \item{data}{an optional data frame containing the variables in the model. If not found in \code{data}, the variables are taken from \code{environment(formula)}, typically the environment from which \code{selm} is called.} \item{weights}{a numeric vector of weights associated to individual observations. Weights are supposed to represent frequencies, hence must be non-negative integers (not all 0) and \code{length(weights)} must equal the number of observations. If not assigned, a vector of all 1's is generated.} \item{subset}{an optional vector specifying a subset of observations to be used in the fitting process.} \item{na.action}{a function which indicates what should happen when the data contain \code{NA}s. The default is set by the \code{na.action} setting of \code{\link[base]{options}}. The \sQuote{factory-fresh} default is \code{\link{na.omit}}. Another possible value is \code{NULL}, no action. % Value \code{\link[stats]{na.exclude}} can be useful. } \item{start}{a vector (in the univariate case) or a list (in the multivariate case) of initial values for the search of the parameter estimates. If \code{start=NULL} (default), initial values are selected by the procedure.} \item{fixed.param}{a list of assignments of parameter values which must be kept fixed in the numerical maximization process. Currently, there is only one such option, of the form \code{list(nu=)}, to fix the degrees of freedom at the named \code{} when \code{family="ST"}, for instance \code{list(nu=3)}. Setting \code{fixed.param=list(nu=1)} is equivalent to select \code{family="SC"}.} \item{method}{a character string which selects the estimation method to be used for fitting. Currently two options exist: \code{"MLE"} (default) and \code{"MPLE"}, corresponding to standard maximum likelihood and maximum penalized likekelihood estimation, respectively. See \sQuote{Details} for additional information. } \item{penalty}{a character string which denotes the penalty function to be subtracted to the log-likelihood function, when \code{method="MPLE"}; if \code{penalty=NULL} (default), a pre-defined function is adopted. See \sQuote{Details} for a description of the default penalty function and for the expected format of alternative specifications. When \code{method="MLE"}, no penalization is applied and this argument has no effect.} \item{offset}{this can be used to specify an \emph{a priori} known component to be included in the linear predictor during fitting. This should be \code{NULL} or a numeric vector of length equal to the number of cases. One or more \code{\link{offset}} terms can be included in the formula instead or as well, and if more than one are specified their sum is used. } \item{model, x, y}{logicals. If \code{TRUE}, the corresponding components of the fit are returned.} \item{\dots}{optional control parameters, as follows. \itemize{ \item \code{trace}: a logical value which indicates whether intermediate evaluations of the optimization process are printed (default: \code{FALSE}). \item \code{info.type}: a character string which indicates the type of Fisher information matrix; possible values are \code{"observed"} (default) and \code{"expected"}. Currently \code{"expected"} is implemented only for the \acronym{SN} family. \item \code{opt.method}: a character string which selects the numerical optimization method, among the possible values \code{"nlminb", "Nelder-Mead", "BFGS", "CG", "SANN"}. If \code{opt.method="nlminb"} (default), function \code{\link[stats]{nlminb}} is called, otherwise function \code{\link[stats]{optim}} is called with \code{method} equal to \code{opt.method}. \item \code{opt.control}: a list of control parameters which is passed on to \code{nlminb} or to \code{optim}, depending on the chosen \code{opt.method}. } } } \details{By default, \code{selm} fits the selected model by maximum likelihood estimation (\acronym{MLE}), making use of some numerical optimization method. Maximization is performed in one parameterization, usually \acronym{DP}, and then the estimates are mapped to other parameter sets, \acronym{CP} and pseudo-\acronym{CP}; see \code{\link{dp2cp}} for more information on parameterizations. These parameter transformations are carried out trasparently to the user. The observed information matrix is used to obtain the estimated variance matrix of the \acronym{MLE}'s and from this the standard errors. Background information on \acronym{MLE} in the context of \acronym{SEC} distributions is provided by Azzalini and Capitanio (2014); see specifically Chapter 3, Sections 4.3, 5.2, 6.2.5--6. For additional information, see the original research work referenced therein. Although the density functionof SEC distributions are expressed using \acronym{DP} parameter sets, the methods associated to the objects created by this function communicate, by default, their outcomes in the \acronym{CP} parameter set, or its variant form pseudo-\acronym{CP} when \acronym{CP} does not exist; the \sQuote{Note} at \code{\link{summary.selm}} explains why. A more detailed discussion is available in Sections 3.1.4--6 and 5.2.3 of Azzalini and Capitanio (2014) and in Section 4 of Arellano-Valle and Azzalini (2008). There is a known open issue which affects computation of the information matrix of the multivariate skew-normal distribution when the slant parameter \eqn{\alpha} approaches the null vector; see p.149 of Azzalini and Capitanio (2014). Consequently, if a model with multivariate response is fitted with \code{family="SN"} and the estimate \code{alpha} of \eqn{\alpha} is at the origin or neary so, the information matrix and the standard errors are not computed and a warning message is issued. In this unusual circumstance, a simple work-around is to re-fit the model with \code{family="ST"}, which will work except in remote cases when (i) the estimated degrees of freedom \code{nu} diverge and (ii) still \code{alpha} remains at the origin. In some cases, especially for small sample size, the \acronym{MLE} occurs on the frontier of the parameter space, leading to \acronym{DP} estimates with \code{alpha=Inf} or to a similar situation in the multivariate case or in an alternative parameterization. Such outcome is regared by many as unsatisfactory; surely it prevents using the observed information matrix to compute standard errors. This problem motivates the use of maximum penalized likelihood estimation (\acronym{MPLE}), where the regular log-likelihood function \eqn{\log~L}{log(L)} is penalized by subtracting an amount \eqn{Q}, say, increasingly large as \eqn{|\alpha|} increases. Hence the function which is maximized at the optimization stage is now \eqn{\log\,L~-~Q}{log(L) - Q}. If \code{method="MPLE"} and \code{penalty=NULL}, the default function \code{Qpenalty} is used, which implements the penalization: \deqn{Q(\alpha) = c_1 \log(1 + c_2 \alpha_*^2)}{% Q(\alpha)= c₁ log(1 + c₂ [\alpha*]²)} where \eqn{c_1}{c₁} and \eqn{c_2}{c₂} are positive constants, but depending on the degrees of freedom \code{nu} in the \code{"ST"} case, \deqn{\alpha_*^2 = \alpha^\top \bar\Omega \alpha}{%? [\alpha*]² = \alpha' cor(\Omega) \alpha} and \eqn{\bar\Omega}{cor(\Omega)} denotes the correlation matrix associated to the scale matrix \code{Omega} described in connection with \code{\link{makeSECdistr}}. In the univariate case \eqn{\bar\Omega=1}{cor(\Omega)=1}, so that \eqn{\alpha_*^2=\alpha^2}{[\alpha*]²=\alpha²}. Further information on \acronym{MPLE} and this choice of the penalty function is given in Section 3.1.8 and p.111 of Azzalini and Capitanio (2014); for a more detailed account, see Azzalini and Arellano-Valle (2013) and references therein. It is possible to change the penalty function, to be declared via the argument \code{penalty}. For instance, if the calling statement includes \code{penalty="anotherQ"}, the user must have defined \verb{ }\code{anotherQ <- function(alpha_etc, nu = NULL, der = 0)} with the following arguments. \itemize{ \item \code{alpha_etc}: in the univariate case, a single value \code{alpha}; in the multivariate case, a two-component list whose first component is the vector \code{alpha}, the second one is matrix equal to \code{cov2cor(Omega)}. % \eqn{\bar\Omega}{corOmega}. \item \code{nu}: degrees of freedom, only relevant if \code{family="ST"}. \item \code{der}: a numeric value which indicates the required order of derivation; if \code{der=0} (default value), only the penalty \code{Q} need to be retuned by the function; if \code{der=1}, \code{attr(Q, "der1")} must represent the first order derivative of \code{Q} with respect to \code{alpha}; if \code{der=2}, also \code{attr(Q, "der2")} must be assigned, containing the second derivative (only required in the univariate case). } This function must return a single numeric value, possibly with required attributes when is called with \code{der>1}. Since \pkg{sn} imports functions \code{\link[numDeriv]{grad}} and \code{\link[numDeriv]{hessian}} from package \pkg{numDeriv}, one can rely on them for numerical evaluation of the derivatives, if they are not available in an explicit form. This penalization scheme allows to introduce a prior distribution \eqn{\pi} for \eqn{\alpha} by setting \eqn{Q=-\log\pi}{Q=-log(\pi)}, leading to a maximum \emph{a posteriori} estimate in the stated sense. See \code{\link{Qpenalty}} for more information and an illustration. } \value{an S4 object of class \code{selm} or \code{mselm}, depending on whether the response variable of the fitted model is univariate or multivariate. These objects are described in the \code{\linkS4class{selm} class}. } \references{ Arellano-Valle, R. B., and Azzalini, A. (2008). The centred parametrization for the multivariate skew-normal distribution. \emph{J. Multiv. Anal.} \bold{99}, 1362--1382. Corrigendum: vol.100 (2009), p.816. Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. Azzalini, A. and Arellano Valle, R. V. (2013, available on line 30 June 2012). Maximum penalized likelihood estimation for skew-normal and skew-\emph{t} distributions. \emph{J. Stat. Planning & Inference} \bold{143}, 419--433. } \author{Adelchi Azzalini} \section{Warning}{ The estimates are obtained by numerical optimization methods and, as usual in similar cases, there is no guarantee that the maximum of the objective function is achieved. Both consideration of model simplicity and numerical experience indicate that models with \acronym{SN} error terms generally produce more reliable results compared to those with the \acronym{ST} family. Take into account that models involving a traditional Student's \eqn{t} distribution with unknown degres of freedom can already be problematic; the presence of the (multivariate) slant parameter \eqn{\alpha} in the \acronym{ST} family cannot make things any simpler. Consequently, care must be exercised, especially so if one works with the (multivariate) \acronym{ST} family. Consider re-fitting a model with different starting values and, in the \acronym{ST} case, building the profile log-likelihood for a range of \eqn{\nu} values. Details on the numerical optimization which has produced object \code{obj} can be estracted with \code{slot(obj, "opt.method")}. Be aware that occasionally \code{optim} and \code{nlminb} declare successful completion of a regular minimization problem at a point where the Hessian matrix is not positive-definite. A case of this sort is presented in the final portion of the examples below. } \seealso{\itemize{ \item \code{\linkS4class{selm}} for classes \code{"selm"} and \code{"mselm"}, \code{\link{summary.selm}} for summaries, \code{\link{plot.selm}} for plots \item the generic functions \code{\link{coef}}, \code{\link{logLik}}, \code{\link{residuals}}, \code{\link{fitted}}, \code{\link{vcov}}. \item the underlying function \code{\link{selm.fit}} and those further down \item the selection of a penalty function of the log-likelihood, such as \code{\link{Qpenalty}}. }} \examples{ data(ais) m1 <- selm(log(Fe) ~ BMI + LBM, family="SN", data=ais) print(m1) summary(m1) s<- summary(m1, "DP", cov=TRUE, cor=TRUE) plot(m1) plot(m1, param.type="DP") logLik(m1) coef(m1) coef(m1, "DP") var <- vcov(m1) # m1a <- selm(log(Fe) ~ BMI + LBM, family="SN", method="MPLE", data=ais) m1b <- selm(log(Fe) ~ BMI + LBM, family="ST", fixed.par=list(nu=8), data=ais) # data(barolo) attach(barolo) A75 <- (reseller=="A" & volume==75) logPrice <- log(price[A75],10) m <- selm(logPrice ~ 1, family="ST") summary(m) plot(m, which=2, col=4, main="Barolo log10(price)") # cfr Figure 4.7 of Azzalini & Capitanio (2014), p.107 detach(barolo) #----- # examples with multivariate response # m3 <- selm(cbind(BMI, LBM) ~ WCC + RCC, family="SN", data=ais) plot(m3, col=2, which=2) summary(m3, "dp") coef(m3) coef(m3, vector=FALSE) # data(wines) m28 <- selm(cbind(chloride, glycerol, magnesium) ~ 1, family="ST", subset=(wine=="Grignolino"), data=wines) dp28 <- coef(m28, "DP", vector=FALSE) plot(m28, param.type="dp") # cfr Figures 6.1 and 6.2 of Azzalini & Capitanio (2014), pp.181-2 plot(m28, param.type="pseudo-CP") # \donttest{ m31 <- selm(cbind(BMI, LBM)~ Ht + Wt, family="ST", data=ais) # Warning message... slot(m31, "opt.method")$convergence m32 <- selm(cbind(BMI, LBM) ~ Ht + Wt, family="ST", data=ais, opt.method="BFGS") # Warning message... slot(m32, "opt.method")$convergence } } \keyword{regression} \keyword{univar} \keyword{multivariate} sn/man/summary.SECdistr-class.Rd0000644000176000001440000000407012255404114016242 0ustar ripleyusers% file sn/man/summary.SECdistr-class.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{summary.SECdistrMv-class} \Rdversion{1.1} \docType{class} \alias{summary.SECdistrMv-class} \alias{summary.SECdistrUv-class} \alias{show,summary.SECdistrMv-method} \alias{show,summary.SECdistrUv-method} \title{Classes \code{summary.SECdistrMv} and \code{summary.SECdistrUv}} \description{Summaries of objects of classes \code{SECdistrMv} and \code{SECdistrUv}} \section{Objects from the Class}{ Objects can be created by calls of type \code{summary(object)} when \code{object} is of class either \code{"SECdistrMv"} or \code{"SECdistrUv"}.} \section{Slots}{ \describe{ \item{\code{family}:}{A character string which represents the parametric family of \acronym{SEC} type } \item{\code{dp}:}{Object of class \code{"list"} or \code{"vector"} for \code{"SECdistrMv"} and \code{"SECdistrUv"}, respectively} \item{\code{name}:}{Object of class \code{"character"} with the name of distribution } \item{\code{compNames}:}{For \code{"SECdistrMv"} objects, a character vector with names of the components of the multivariate distribution} \item{\code{cp}:}{Object of class \code{"list"} or \code{"vector"} for \code{"SECdistrMv"} and \code{"SECdistrUv"}, respectively} \item{\code{cp.type}:}{a character string of the \acronym{CP} version} \item{\code{aux}:}{A list of auxiliary quantities } } } \section{Methods}{ \describe{ \item{show}{\code{signature(object = "summary.SECdistrMv")}: ... } \item{show}{\code{signature(object = "summary.SECdistrUv")}: ... } } } %\references{%% ~~put references to the literature/web site here~~} \author{Adelchi Azzalini} % \note{%% ~~further notes~~} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{\code{\link{summary.SECdistrMv}}, \code{\link{summary.SECdistrUv}}, \code{\link{makeSECdistr}}, \code{\link{dp2cp}} } % \examples{showClass("summary.SECdistrMv")} \keyword{classes} sn/man/barolo.Rd0000644000176000001440000000360512255403477013257 0ustar ripleyusers% file sn/man/barolo.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{barolo} \alias{barolo} \docType{data} \title{Price of Barolo wine} \description{A data frame with prices of bottles of Barolo wine and some auxiliary variables} \usage{data(barolo)} \format{A data frame with 307 observations on five variables, as follows: \tabular{ll}{% \code{reseller}\tab reseller code, a factor with levels \code{A, B, C, D} \cr \code{vintage} \tab vintage year, numeric \cr \code{volume} \tab content volume in centilitres, numeric \cr \code{price} \tab price in Euro, numeric \cr \code{age} \tab age in 2010, numeric } For six items, \code{vintage} is \code{NA}'s and so also \code{age}. Three of these items have a non-standard volume of 50 cl. } \details{The data have been obtained in July 2010 from the websites of four Italian wine resellers, selecting only quotations of Barolo, a wine produced in the Piedmont region of Italy. The price quotations do not include the cost of delivery. The data have been presented in Section 4.3.2 of the reference below, where a subset of them has been used for illustrative purposes. This subset refers to reseller \code{"A"} and bottles of 75cl. } \source{ Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } %\references{ %% ~~ possibly secondary sources and usages ~~} \examples{ data(barolo) attach(barolo) f <- cut(age, c(0, 5, 6, 8, 11, 30)) table(volume, f) plot(volume, price, col=as.numeric(f), pch=as.character(reseller)) legend(400, 990, col=1:5, lty=1, title="age class", legend=c("4-5", "6", "7-8", "9-11", "12-30")) # A75 <- (reseller=="A" & volume==75) hist(log(price[A75],10), col="gray85") # see Figure 4.7 of the source } \keyword{datasets} sn/man/sn-st.info.Rd0000644000176000001440000001311712255404032013762 0ustar ripleyusers% file sn/man/sn-st.info.Rd % This file is a component of the package 'sn' for R % copyright (C) 2013 Adelchi Azzalini %--------------------- \name{sn-st.info} \alias{sn.infoUv} \alias{sn.infoMv} \alias{st.infoUv} \alias{st.infoMv} \title{Expected and observed Fisher information for \acronym{SN} and \acronym{ST} distributions} \description{ Computes Fisher information for parameters of simple sample having skew-normal (\acronym{SN}) or skew-\eqn{t} (\acronym{ST}) distribution or for a regression model with errors term having such distributions, in the \acronym{DP} and \acronym{CP} parametrizations. } \usage{ sn.infoUv(dp=NULL, cp=NULL, x=NULL, y, w, penalty=NULL, norm2.tol=1e-06) sn.infoMv(dp, x=NULL, y, w, norm2.tol=1e-06) st.infoUv(dp=NULL, cp=NULL, x=NULL, y, fixed.nu=NULL, w, penalty=NULL, norm2.tol=1e-06) st.infoMv(dp, x=NULL, y, fixed.nu=NULL, w, penalty=NULL, norm2.tol=1e-06) } \arguments{ \item{dp, cp}{direct or centred parameters, respectively; one of the two vectors must be supplied, but not both. For the univariate \acronym{SN} distribution, \code{sn.infoUv} is to be used, and these arguments are vectors. In the multivariate case, \code{sn.infoMv} is to be used and these arguments are lists. See \code{\link{dp2cp}} for their description.} \item{x}{an optional matrix which represents the design matrix of a regression model} \item{y}{a numeric vector (for \code{sn.infoUv} and \code{st.infoUv}) or a matrix (for \code{sn.infoMv} and \code{st.infoMv}) representing the response. In the \acronym{SN} case ( \code{sn.infoUv} and \code{sn.infoMv}), \code{y} can be missing, and in this case the observed information matrix is computed; otherwise the observed information is computed. In the \acronym{ST} case ( \code{st.infoUv} and \code{st.infoMv}, \code{y} is a required argument, since only the observed information matrix for \acronym{ST} distributions is implemented. See \sQuote{Details} for additional information.} \item{w}{an optional vector of weights; if missing, a vector of 1's is generated.} \item{fixed.nu}{an optional numeric value which declared a fixed value of the degrees of freedom, \code{nu}. If not \code{NULL}, the information matrix has a dimension reduced by 1.} \item{penalty}{a optional string?? with the same penalty function used in the call to \code{\link{selm}}; see this function for its description;} \item{norm2.tol}{for the observed information case, the Mahalanobis squared distance of the score 0 is evaluated; if it exceeds \code{norm2.tol}, a warning message is issued, since the \sQuote{information matrix} so evaluated may be not positive-definite. See \sQuote{Details} for additional information. } } \value{ a list containing the following components: \item{dp, cp}{one of the two arguments is the one supplied on input; the other one matches the previous one in the alternative parametrization.} \item{type}{the type of information matrix: "observed" or "expected".} \item{info.dp, info.cp}{matrices of Fisher (observed or expected) information in the two parametrizations.} \item{asyvar.dp, asyvar.cp}{inverse matrices of Fisher information in the two parametrizations, when available; See \sQuote{Details} for additional information. } \item{aux}{a list containing auxiliary elements, depending of the selected function and the type of computation.} } \section{Details}{ In the univariate case, when \code{x} is not set, then a simple random sample is assumed and a matrix \code{x} with a single column of all 1's is constructed; in this case, the supplied vector \code{dp} or \code{cp} must have length 3. If \code{x} is set, then the supplied vector of parameters, \code{dp} or \code{cp}, must have length \code{ncol(x)+2}. In the multivariate case, a direct extension of this scheme applies. If the observed information matrix is required, \code{dp} or \code{dp} should represent the maximum likelihood estimates (MLE) for the given \code{y}, otherwise the information matrix may fail to be positive-definite. Therefore, the squared Mahalobis norm of the score vector is evaluated and compared with \code{norm2.tol}. If it exceeds this threshold, it is taken as an indication that the parameter is not at the MLE and a warning message is issued. The returned list still includes \code{info.dp} and \code{info.cp}, but in this case these represent merely the matrices of second derivatives; \code{asyvar.dp} and \code{asyvar.cp} are set to \code{NULL}. } \section{Background}{ The information matrix for the the univariate \acronym{SN} distribution in the two stated parameterizations in discussed in Sections 3.1.3--4 of Azzalini and Capitanio (2014). For the multivariate distribution, Section 5.2.2 of this monograph summarizes briefly the findings of Arellano-Valle and Azzalini (2008). For \acronym{ST} ?? } \references{ Arellano-Valle, R. B., and Azzalini, A. (2008). The centred parametrization for the multivariate skew-normal distribution. \emph{J.\ Multiv.\ Anal.} \bold{99}, 1362--1382. Corrigendum: vol.\,100 (2009), p.\,816. Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \seealso{\code{\link{dsn}}, \code{\link{dmsn}}, \code{\link{dp2cp}}} \examples{ infoE <- sn.infoUv(dp=c(0,1,5)) infoO <- sn.infoUv(cp=c(0,1,0.8), y=rsn(50, dp=c(0,1,5))) # data(wines) X <- model.matrix(~ pH + wine, data=wines) fit <- sn.mple(x=X, y=wines$alcohol) infoE <- sn.infoUv(cp=fit$cp, x=X) infoO <- sn.infoUv(cp=fit$cp, x=X, y=wines$alcohol) } \keyword{distribution} sn/man/dsn.Rd0000644000176000001440000001041112262477743012563 0ustar ripleyusers% file sn/man/dsn.Rd % This file is a component of the package 'sn' for R % copyright (C) 1998-2013 Adelchi Azzalini %--------------------- \name{dsn} \alias{dsn} \alias{psn} \alias{qsn} \alias{rsn} \title{Skew-Normal Distribution} \description{Density function, distribution function, quantiles and random number generation for the skew-normal (\acronym{SN}) and the extended skew-normal (\acronym{ESN}) distribution.} \usage{ dsn(x, xi=0, omega=1, alpha=0, tau=0, dp=NULL, log=FALSE) psn(x, xi=0, omega=1, alpha=0, tau=0, dp=NULL, engine, ...) qsn(p, xi=0, omega=1, alpha=0, tau=0, dp=NULL, tol=1e-8, ...) rsn(n=1, xi=0, omega=1, alpha=0, tau=0, dp=NULL) } \arguments{ \item{x}{vector of quantiles. Missing values (\code{NA}'s) and \code{Inf}'s are allowed.} \item{p}{vector of probabilities. Missing values (\code{NA}s) are allowed} \item{xi}{vector of location parameters.} \item{omega}{vector of scale parameters; must be positive.} \item{alpha}{ vector of slant parameters; \code{+/- Inf} is allowed. With \code{psn} and \code{qsn}, it must be of length 1 if \code{engine="T.Owen"}.} \item{tau}{a single value representing the `hidden mean' parameter of the \acronym{ESN} distribution; \code{tau=0} (default) corresponds to a \acronym{SN} distribution.} \item{dp}{a vector of length 3 (in the \acronym{SN} case) or 4 (in the \acronym{ESN} case), whose components represent the individual parameters described above. If \code{dp} is specified, the individual parameters cannot be set.} \item{n}{sample size.} \item{tol}{a scalar value which regulates the accuracy of the result of \code{qsn}.} \item{log}{logical flag used in \code{dsn} (default \code{FALSE}). When \code{TRUE}, the logarithm of the density values is returned.} \item{engine}{a character string which selects the computing engine; this is either \code{"T.Owen"} or \code{"biv.nt.prob"}, the latter from package \code{mnormt}. If \code{tau != 0} or \code{length(alpha)>1}, \code{"biv.nt.prob"} must be used. If this argument is missing, a default selection rule is applied.} \item{...}{ additional parameters passed to \code{T.Owen}} } \value{density (\code{dsn}), probability (\code{psn}), quantile (\code{qsn}) or random sample (\code{rsn}) from the skew-normal distribution with given \code{xi}, \code{omega} and \code{alpha} parameters or from the extended skew-normal if \code{tau!=0} } \section{Details}{ Typical usages are \preformatted{% dsn(x, xi=0, omega=1, alpha=0, log=FALSE) dsn(x, dp=, log=FALSE) psn(x, xi=0, omega=1, alpha=0, ...) psn(x, dp=, ...) qsn(p, xi=0, omega=1, alpha=0, tol=1e-8, ...) qsn(x, dp=, ...) rsn(n=1, xi=0, omega=1, alpha=0) rsn(x, dp=) } \code{psn} and \code{qsn} make use of function \code{\link{T.Owen}} or \code{\link[mnormt:dmt]{biv.nt.prob}} } \section{Background}{ The family of skew-normal distributions is an extension of the normal family, via the introdution of a \code{alpha} parameter which regulates asymmetry; when \code{alpha=0}, the skew-normal distribution reduces to the normal one. The density function of the \acronym{SN} distribution in the \sQuote{normalized} case having \code{xi=0} and \code{omega=1} is \code{2*dnorm(x)*pnorm(alpha*x)}. An early discussion of the skew-normal distribution is given by Azzalini (1985); see Section 3.3 for the \acronym{ESN} variant, up to a slight difference in the parameterization. An updated extensive account is provided by Chapter 2 of Azzalini and Capitanio (2014); the \acronym{ESN} variant is presented Section 2.2. A multivariate version of the distribution is examined in Chapter 5.} \references{ Azzalini, A. (1985). A class of distributions which includes the normal ones. \emph{Scand. J. Statist.} \bold{12}, 171-178. Azzalini, A. with the collaboration of Capitanio, A. (2014). \emph{The Skew-Normal and Related Families}. Cambridge University Press, IMS Monographs series. } \seealso{ Functions used by \code{psn}: \code{\link{T.Owen}}, \code{\link[mnormt:dmt]{biv.nt.prob}} Related distributions: \code{\link{dmsn}}, \code{\link{dst}}, \code{\link{dmst}} } \examples{ pdf <- dsn(seq(-3, 3, by=0.1), alpha=3) cdf <- psn(seq(-3, 3, by=0.1), alpha=3) q <- qsn(seq(0.1, 0.9, by=0.1), alpha=-2) r <- rsn(100, 5, 2, 5) } \keyword{distribution}