ipred/0000755000176200001440000000000014120340712011347 5ustar liggesusersipred/NAMESPACE0000644000176200001440000000426113461342153012601 0ustar liggesusersuseDynLib(ipred, .registration = TRUE) ### exported functions export( getsurv, bagging, ipredbagg, control.errorest, errorest, bootest, cv, inbagg, inclass, ipredknn, predict.ipredknn, kfoldcv, mypredict.lm, rsurv, sbrier, varset, slda) importFrom("stats", "as.formula", "complete.cases", "cov", "delete.response", "model.extract", "model.frame", "model.matrix", "na.omit", "naresid", "predict", "rexp", "rnorm", "runif", "terms") importFrom("utils", "data", "getFromNamespace") ### register S3 methods #S3method(print, bagging) S3method(slda, default) S3method(slda, formula) S3method(slda, factor) S3method(bagging, default) S3method(bagging, data.frame) S3method(errorest, default) S3method(errorest, data.frame) S3method(bootest, default) S3method(bootest, integer) S3method(bootest, factor) S3method(bootest, numeric) S3method(bootest, Surv) S3method(inbagg, default) S3method(inbagg, data.frame) S3method(inclass, default) S3method(inclass, data.frame) S3method(cv, default) S3method(cv, integer) S3method(cv, factor) S3method(cv, numeric) S3method(cv, Surv) S3method(ipredbagg, Surv) S3method(ipredbagg, factor) S3method(ipredbagg, numeric) S3method(prune, classbagg) S3method(prune, survbagg) S3method(prune, classbagg) S3method(print, classbagg) S3method(print, regbagg) S3method(print, survbagg) S3method(print, classbagg) S3method(summary, classbagg) S3method(summary, survbagg) S3method(summary, classbagg) S3method(print, cvclass) S3method(print, bootestclass) S3method(print, cvreg) S3method(print, bootestreg) S3method(print, cvsurv) S3method(print, bootestsurv) S3method(print, inbagg) S3method(summary, inbagg) S3method(print, inclass) S3method(summary, inclass) S3method(predict, inbagg) S3method(predict, inclass) S3method(predict, classbagg) S3method(predict, regbagg) S3method(predict, survbagg) S3method(predict, slda) S3method(predict, ipredknn) #S3method(predict, sclass) #S3method(predict, sreg) ##imported functions importFrom(class, knn) importFrom(rpart, rpart, prune, na.rpart, rpart.control) importFrom(MASS, lda) importFrom(survival, Surv, is.Surv, survfit) importFrom(nnet, multinom) importFrom(prodlim, prodlim) ipred/data/0000755000176200001440000000000014120335534012266 5ustar liggesusersipred/data/DLBCL.rda0000644000176200001440000000774313461342153013613 0ustar liggesusersX 8ן"DDD%mXK%,HB Y#z"-,C/RY %[ǾfX+|w}u99g.?CcDc8#,,)+O K4u5p,4c9m䦍4NRz\4,a>ga / |4a~]V 4A4 BҰQ F6Ұ4lY෨ 67zeqCSwVYYY9uc X"֡u(c*XvCN22222222%bĈXD,"˃A_<;&%&%&1y,Sy,Sy,1y,1y,,,,ls ۱)+7!fm[SN//'#b~|[9z'z'OyXXjluaǾ=4O|" #[#ndMbԹ^-祮p>Gho̶*+g-?苛uu_Ӯ:m^>篠G˝ y y[Y܇8=`BBBQN5vuCĵ/ Ս[׾4TSG oNjQQZ_pb]7yud~c㷊@~VL,s 65~k0bKFRTDh*CBmOAAOQ> )]-(f P 9q7"\A[gF@&!ZӝH=jXUXJ)C7H晩@qdaEzVaTfRjB#x ڊKsqǬ#h H O 'ΣL~9e:7 ECtDe12z|J&?=V C[BA_Va ˁocJQJPt\*_`CP}or4%ywd21~K5 rT%%0`tx]TAB ]"ؖCMEMKfBdyd+ '[?#+DSe޾_oN IJ ; 2SF+]hBkj}v6B{ŊCl|gQ@dn.≆|󚯿vZAJRI4wE$U@ᑐ|Սm8,CS'Vnpf N[BUV[@~~Q] 'L,x鹏C7eMjz̲u֭u U(<6fVkbuNR"IFߊOa^)P&c "{ZBN V40U$+B,trzreyF?,V >9LE5zQc}r6ZKVYfu6#ʸ0*mu<~StY .EIԒj{2e[M¾Y:_ĉKk܈%P/ 1:f'Etuψ>I+4Bky\4429@k0m\<Ѧ6z/@n@\e+2K19M bPq,)&^+_!=$h3OMRP)(RR$MOf!YV;c3Ԕqќp4L~8 j=Of̷IԮ+ l>-ѳFE[M4T$ 5YEEvz@#ߍn#7ȟPN\*TJfwĜ׬[k.4YKNJBN-H(HDC,.^ֶ^vQg91=ᎻT|Hz^1Aj9Γh[=&Ǐr! Jnn2 s7~!QPZSP%xwk Z=gC HA$hNoEV+i.~~(RǷ\o, -kϕ10vq*j+~4;h@Yh9#1v/=zǛ᠛E/-=<So9ruq;M6]!mjPRK<:w#fM=4O uPJQ(WR-zB?T+$iq+c+P{ڝ6q|/sku[.W9PlR *Rܳ ,O(sb/8 *Fͧ }A|h١eT{gGMm飮o_8&f{M}l\t~"$g^C+YnB`u|hCsu(Lwsmk,{Ml=qe55}QCh߱o᦭pg`Ϗ8(O޲{FjPBYj@=:-r46$B}Y"FTuyzԍNXPc Q+~r4' q';ej7ZO BIN 6ԟ[le7ph̸}g8?mJ v/1j*h^-ǣjN# D4b!pŢ<2"_ fB/ k.eM!ʭK,mQ߳;ȦA׿) ھ76 ~ħɛv7s5CC)u ޔDxaZ XOQ;vVR$uBۇK[I_Ri2ȹ]PŻKwAe[8hkrmrz|j eGj2z>@PpԌ'>ok6A5ҡaom1ylc{U HiM']%kh%`|T2縋' jZ~> ˶5J³+bߢ"4j bGg8Ppf 4t0 e3;' Pmō,;ӘN}nlOYn x*]=6FŦ+PMPsbU[j!a -q qF-Jrep[V8ghP&ӝ\:_lh]pvqss:Gw92BXOٜsYtuSN-,E"KqR^d,Bu 8ܭdl]m0)0G<< <<;A`g3x LT Ly LAT L Lя ǬL@d`j~G`|G`zG`*yG`jwhG`uLGg`*uDGg`tܼX4? Pipred/data/Smoking.rda0000644000176200001440000000244213461342153014371 0ustar liggesusersW[L\U3UXQkZ)p$"U 0 ǔ%fT t@TcM6&a?Ao؏^{H4=f[>7|J "JQlIn'n p7`+T ~6B< (< x $Px<(Tlpa@%-{o__[gdG/Cm-C5o7㧬[sQKHtlx0@L.)p]NZ?mQ՜@Zof>SGEvV8٩g<*ӛR鱼}k8f<ޚl[_lU.6ޘKϭVKk:"]ʖ-!Rbej=xl+~+VU9ީJ/u2fY/QYx|>zz>,[|oQ=v}%]?N:}>ˏR=+'uԯ_ѹc4_m+tY> 椙R5aCZ,:tNQ~j+rot/ ݓG?}Fѽxx gSd}',:R9Ʃ%ҟ4G^IL>C^<o_>~g>>Mp>t~Fo.o%JC =lyڨO-w9:6^ {t^&DU\O^Fx-Ofݧ>kyёjv*ū/,^xUU'hآa-hآa-hآሆ#h8ሆ#h8ሆ#hኆ+hኆ+hhE#,aEB#p,uo>ܜFipred/data/GlaucomaMVF.rda0000644000176200001440000004531013461342153015064 0ustar liggesusers7zXZi"6!XkJ])TW"nRʟXS#&'ƯNUL~<Ћ2ݬ-+֦`Eu&JKuA (T%fQ{ Ri>x6LO/@OW5y3VYz|c(w(gAjQzZ!+ܕӈ@8?Ne?Q߀wATU[%/Yֿ(q?= gCtd%*%G)*%jjɵ?MƯ&M*\(QJyNń "I:^zdsomNBYPJ>K #C/,SaÛwe4y9!HU +9t^:^1 pQq"-~j[WͩdA ,Cݧξ xۦFqwC[)pӺژY_% %/PeC!xNj`~8;Gf&a0 9P 8T.b InA}Xc\T~ o -T)GK h&g#ϸ@!vYZ41bGk%>ԘC ZYM~+N`i>vVGUzN`zq|"άLu?zeOHzY,TGIJqo%9O|2fa;TypjueA9'?f]b!acn( Mү҉8"! 5Fo M=?$V81#΀?&MvD{@bS!&i'I~֦s[GO^Dt?iniDszPQFvd}ǎ7[`F;>8XЮkj &{)+$e F$4e3` v|bDpuѩFle0T$bK1̀ ɳ%ТbsZI͆3S5^?jJ0ʎB '7l}\OwyNk_qӬk}ݥm@aWx9 vٸRd2wsWYʏ Mi;sZ0>PE.?Qo* ҇d Ԓ)%ll3"~ʔx 3:csl ԸvKA槁c932yBaT]7Kª&c}NR}%*pV)/^CXg;Q -E,zF}0dt69>ȣ=:ǂM5^ٲeu yksY=LLX]8R.@i*ӞQ& h;Oue#mE*;R+c[W}3QE+y1=~k{Fp4vao H{cvSPظKwN)s}C &HP7uF &*QJ펷1x~)ApAvV\j81[M*ȠҊص'e b77"$'6Roaz]_iOv<\3p_Fwz-/%`|l∟k0-n^k]G %gm]>qPͰYEKQN[/߉R?Fd L04T_]-ǖ~Aeo,jJ8s k>+ sJjQ~&t4Db2JE/|2Os@nIosnY1@׸Y_ 8ڙԖg;51E"p|9e*{- BrPkH _}[ ?r*^b}` YƙAUEn&CdޠQ}bG~FVA#zk],xl!2}km ++ė@Ͽbwd5cd/Qe%,V^ nuPᘸ{JGnY;2m]2i(rݸ'3q}y*efVƩ$Eu%28opݙv#0! :liRH 0)ZɁՊw~z"#9fT(t #+0P<\\(=}[$q~| $ 曧U?2kXv $pn\L4C[b Y! 4g.OB>e[.pw,p8A4E *Y].c?LЪϿ bס9=>Ȣ_:f9%  $X=Wp(~:X@rlqS5WfOqLbֱш퉽ѿKԂ/|2aUvbf p>M7q$$<n3N]CeڞGyiKG ۂ(a|NZk$ *RN.u*ʚf&k@*ܸk8Lک;ԝa;OB?(M74. IPWbh.0<\qKyyb>ܚ2a?BhYUj J<|Q!=Eϛn\Y+5*\&ӵhm8fQ }19 o.Wb֟|-wKw#dt?Լ|,@GXgESh^C Duؤ;ɑ$fC%eU1\Bd/ϸ!WwХ>1LҡsKM5U6q 9€&5,$UfAd!5GxF3qI?7;r`We ~PŠIU1PVrȷv>v~A荓|`,G7)Hs~<)+nYs{'2b7S[ғ7-*PwESf()J~D]5Lq /-.1 1Ԏw/`m u(4t>0 C{}EA=xHwťx' r`o 5yNN%bvf4Av$, Bcp9R`Z.Hr 20Zԝ\ժ~ ZLgގ7u㊻~<`$sA4/w=RA9m%[%ʔT,_ʆ%|4d>k2^߉IYOO Y{Wޢ``ۗ :VBCqG'+p\b' JisfU&sFkMm jB'ynKǧ([Uwہ9ܕBUz4{[քe0"an#2~O9qkFN˥6Wnd1x`kM]KF!l͝h%1MQz >[tX)`['d5y 4`m|o5)f[V߹an(f~Dz9+ ʽ)MYך3m제#rsև:Up_:Fne!TѡY_۽uzL9VqX5l%X] h ꇀ9UڨkPxb+bdq7g n߮0] M#;ZF`\~U+g_a8:#ƂA^=6ǟdHE6- }4W S!z11%yMKY;6ۭ*Rqv|ɈCf cesPIEߓm [%v6ĂqcHA-y0eͣXCԧiiߦC ȓDcA3O*y[9ZW NQ9ԔUN ~;n~urL+ YyxHDw.F _G<4H@CUI^(`O#nqIj2'W`~z:| (c$IzAFLJ0w9Xxo!\r=hgW)#ﺅsvNԁ-sL52!u `v$+$%B뚩lxW¸W o WYcwS{!Gv)аw6JؓX'%!HqK326RGM@ÛjM+!5N4xgs iC/OK'e"oJRFyl%v 䍭 ' 0%a~T`a&/NAYd&UöD.q&q|8xJ,exA0*.Et T\ݑEs]>rg!,o>@־9{=•!8ȁΌ:/ !kBQ upgMn?$@N=s^Bj9EI]"-$${9l~ <tQ0tL>]Q)zcP a:r? MemX`25*в6qXx֠33Sp&0.Cq`}kr4Jb"Zf%ܤjUޒ䍽ȧjZ:Jr i^Ei@fcin7̺.ͤǸ6>R)$e5or\ޚ|4|&LݗߵW9v5u8B'ǂ΍&c.cRV!ExQw݄L6H3lOGS'F2(c}RJ_𗂋~3{ h i"S d;lbU..wqj+/ p8A_UCH@Q ,D ŞTj 0V`G\o:z9Ɩ&/b\Π* P"d2yv 4ȃd`hnaY}g;/Ϸ՞|:f=aY_i;}kHds%{C4hpuHx]ŸfA¢Rg\zlϽXقyqZ#62JPTL{gcgw {JߣZ*\rcx |gX$'Dw$ɵ5}L)}$ kT`сɬ+H9B ]bSjW1J;k$_' V ff^.;lN)$J:J;S!x,K4̑N̳I;{w`* /B=u{Ʒ7!.|xYPj_M{=5(6?  Y?濫 #Ēk HR(sHXr}c*^㱴eO5, b]'WFc HɕrFtMY?:`PWFBPU7Ad~ EsZJwcxVZ,ej(W<6c'V_yO:Z̻MZȝ/5m*5鳡^wo={m_]Sn M]lČR Ӣ$5.u`7AvH0zp6gJƟ-JU=vI-T~EdK_U۶8!][o3/j.$D24jhgvtr|ŘP"UBzR%"4ο':c2"vWvs'ҕH3] wY}fP «T+HFQ??})4"w2 Y’AgMjNT4מ>*8`%xP{h%YU1_Ωc/d|~;&Bf>\Qy0(kNiD)Z޴ qBsH /;YhM_Baha|XԓՃ8ibiQ׍Aޫ2)Й,N'*e{72Ib* ]i ^O p[g2|8s pFUpCDPC6UhSďIB^*m)(@f?To}_&V|4`;lXjxH>Yҫ7"6_+V ,qΧ1 0V焋E5m`uOXJV9f5v!_l{R9X"m0KW(DmJ"~=+tnZuN0O> MQ" Z :ſA:n\ǺϾѪBtYgѬ!+͐'8Oj(핳{`2]Mos/ m;y4"#T@'yϨJ_ t;D}ݰtҊ̻-"!r|1ض={ٴpŃ\{z°*B$yh=΃/r:r.Kv\kl[Ʉp\@Om>k- VK>ޮ},ߘKF,/m,,6t9c5HA]R-g/+W 9[LX^YrU=NyxXk "/ ԡ9}jr2gsnVPV#c|sp‹J@[%f<|/s{_1s0vAuߴ`1RIvo&/xo83|c 5ݢ `O`BcxR1H\Wp_ YyNǚ (-҈ޣX>ŗ؁Gr$cIg kWnp+/aj4Nȅ.ˮ|c`u !=3=7e@ȢIaQޓé 'XW;vՂ kx`P&֑(guaEBҗ.M,dW{ Hh+5o#HBn7|u-˯7蝊WLZMW֚|V&* n z:3J}pb]VncKWW;?#ErW0WxOjj2a!b8d v^`TFp'WMڡ#./JQbL0]YVE[֗M[Eǒ (CԀ1i846#kuPWWZӽN vCp B ۲kwA"I&d?{~]\x(fF s+yuR 6% I%3/˕^.=5Cf|=6".fG`^ss, cd\j.Ik#I'a<&*v KV߬n,3!z=0zӸv)I> c-U-ZWD>< [,_j]QIHIkcDuD~>?^'7 뒼7 >LȔV '֚pp'~~sv/0wK˸`a>G ^bVr4:Ojֹ?,#fsnap (`MM+&+H+gvo"T8=QQ?J*J} 48iRy']Rۘ)1  \zZ_q[Jv8>$.j=z^B!\DcjXO+q)0%nL)*Z }euŻψ&9?pcJhY ž&Н0gx(@f \D88r4gߓgK#OLg`; HY9[th Ml^ڏ<}r";3᥺5eFu8ޯ7Q̦Fmx<N)澿߶a)EL]X)A 4'=Tg^nI1d:Gލ/^Ηm'OZ[:res'aUp@ﰮ^r#+ol'.H#9п?fTixR%|9~gb L~&nj=uW/ge$w!LD;> <<p.lMx 0 ػW9XSv8McUT8QٚzWnF'iNͺiVc$I!j!./ͨ\"f.[sALKhw,..4{Ik }#k;uV1w>'JY᧜]eJ+]Q $Eg}rZ9K'\i[`5_J&N6):s_㴇u{`Ɏ%\ % W[Jي|4izyڋɜ-C/]դ)PSX`rO`AdrDHR*m7ENn\ ,iGn8#}Fqynq 5)dj[t%}#t9e?RhygSٴ 7]kڊ͡}qn9lǾgy?=ԃLs~GMc{[NE?T#<hB-&%7A06UBFA݉vih[xtM֬d`!>X7BzDhl |[}%y] gS`.J)*~_<hV#O[)$'ʀ5rO#)SR~Tw<W |[ a: t'gfڵ7% 2czdhIvtfًqA 5c\ȈkU,!j\,+k03?5ɸDB%AA dUu{0,fg 1Pc_ejH(vu2,)DQSGi(]6IaBoA&ۆ((pA3%Gl39xm&?-⚎̱4O֧Ȣ )=f"c-0-w#L)+s$t>w.5-8 ňTdeV9 6O),>Uq8/Ɯ%|YU 91 =|nt"ĸ{qƤeWayRf.` %Abnx!KA䓯Y>P 8#BTfr>;[qomδB'@qGh` :%?pA'D6!W_ !$TLle!,-C:M%?tEs^/r*8t\1t.ՖE di9\;9`w Aw2 Dj1"[bk- *GnΊԁgxchW5NzݬB iJp #s_1zi"l0) nˆS;aushB OIp}hgu?˭5UVZD_G:&xʃgju~Fp_R :J%qݥ.8Rs"r7YHh4 |GGpJ#9!hJ돢\L,8sT\3]K51tకUXsV\~ l~D:> %-缑{$Ou/mYY̊"* 9{TU:at'lgioi VF:ڝLCFnK&4F+bT}!WAE>Y@8B`"("k 8C!|LsNA:R)T[M$؟Gh^ L;fYl<>g*76^"mmb~$B&U2%*g?1ngskox0r-8r8Ί+:l*Q*BCSMƓG ߄>`0&$qOӈ wh_ :U 76L&htz')󹻘CƑ?=S+< /E7>#;sd㽛,he *r)FV1`mfb4G5+Z-Nчۋ ϚɩTY͜z.8Lhj[!}jU0_ݭbXZ+ D-؝ 'ip{[葉ݫb 60]*FF8 []WN%C#$Pn;& &k󎯭R!0dUdk-nۺd"mU?>~p>{=f&jlgdо&&3oD孑4Ü 8geTcq* ̸|糢^R3&dE/`XT*!u])߰Ƹ~ߤׇKإ?̳姭 Lu 4BzLUnb .E/ju5[@x-(gp5Z9#ǡTlz$LODJ"A#@n |B51m_*N rC6_?:C_]Qʂ75)%ѧ=";a C8o82ʻ0s-k==BfI E'ѕTe 8?EB7IDLO%WA^+sk@3BVT_+ꜯ#b~[9jK\#X _hm!\̴D[k9Gs21WX cTxu>8Cd*)z=` O2}Wi;I6F%z`s[oԝKO$i$*34FFi< 9?c`,x=Yd?*,h$H* ,1xVnMa=mh% ⨛3!#ti|AI*`y!9-Ig&kBf~ PqnX>*me6IDjHաaZU瀓-#jhM$ v"G}7?ށpFV`8P5o3%F އx8 q,* q "#OGCƂI?(z6@8q˪qZx~-Q (S}XóEQKp>bat楏bI=[~Fٔ.5 B~{$uA"qqs?Ũce0x"S\Io]ms*nTheԉ:fw<ۧ+95xfcdfHo1qۼ=YT[^Hɺ#;'^J5{3{pp ՄX@ˌrUm܉Gv`d ["ySJ1]]1 =S)?mKYj5V_$ZuCJi( p3RqUY"c(smk JX_YN?=zAYBw{I^ul +tEf{2>c}& (hy }?~ jEzc1i2 Ƥo` *Wf.ErrZIcV̯Alk2s&IGJLt"l:ޑ/->/bOO-{Tx]YԿ*%8ܬj ?u6fB>j? *jޞ'J~?mCY<2ep顆Rw(fF%giSSw7PWJ3{0MzJ4K90jc\#\=|̓vUk|Uh6YbEu->c$. q+mN`d jA yD趄N!_/e1z|9Sxaz^R*}7ԻU/τӚЇLJ闺:2CmK l{ġ, PQ>c_/PS+ ~pcʋJ(5Y"řj*K-*8E;e]fʸc ĤY9 @Otȩ_7v@(eh:{ˮ`z!7 D+St\wшnOWN?gV~CQ!0qx#((q Lqu xfic_ĻGZEoH٠ׁ4Vh&*3;!ݑ 0[;L}I |TX1?h"6ʗeyu($=kZݳ&ГŠf\פ=0% F B' $|WiD*Z_SH?p ?bܘ$ai&(8UDN 16O>,p [yf?coNd?`\#kΊא#ex"D yts`eRoM&۾* -"ϩziC?gPznGL竡^¦oy(f`x ֩Udt*>ȭ:(~ I7 6)]îG(ҳf!)Wv(G#SY-Zgc1+pbGU0<=Ua4t0 n^ßcvx[2aH0֖ 91AYDHF_ϧn4g$"[k7ʡ*Fuΰ;@gwT ~I/F={aƁ=W굃{L}Ž{熨l"@-Evb2_>fKw$V/"m.\oPGc" YUD8Soa&x'Ӷ9{ u5qų{*˶.jsL"$*8pxHDZGu.*%FuFuvFVΏ>r嚕j K(nDV m韐/cձҩ*:?%!GyQcS:Tʏ1::5h" VL ՠ:kVT _>R}pG1N:1Z;>!/w:y25K:*n?soďm K !ȾK3AAȲ{W<:^iR` ݷF9Wc.eQ:Q`76e9'l -hKR*U+hmfY $,& ]tbl@~V$C(r^SkBu\;,&1|[ uF[EZRV'MQuD9]ο=F(,k]ƭ&؆' F*Vc/zKa0 YZipred/data/dystrophy.rda0000644000176200001440000000614513461342153015033 0ustar liggesusersBZh91AY&SYDzoo ]ÎXyV"KݲPh?DSTQd~Mh#2yOMi2Qe L b4a<Q#4fM Gid=OS?JJxQ쨓&ҍPPPhz4H$zPjS@=#AMh d$ҊDF†=M=OP=OP@=Mh 2fzɀhLi&ihOB`4 2 244И$Th jHz @l7k )& jh/j~LeT# YA!]XDC*3c$YZx%JdeDwRJ3*0exXrڅ/7UC[g.εs34!IPTen)4$vq`[ [llPTb'n"û l*R%YY uxkvQ/RֱfU^OIҳqc{J0o4[N7Ҹh .>Q09fZ͍[qGҌ3Ta]Ȭ^nkL;)L &!\{<rWL#[RAoc3ʪ o=GJ VFң+jw 8.Jb 3pp6j"qtKʶ+>{;VΩQEB4%+J53qv8ѸWqr`*,UXu Kos 0aXPQTDU`j0EA*bb WOqn'9_[a=aEQz)ŒtL!O TC#3 i@|"Ÿm?o&80Wfd A#.<.G4VbtQ.B.l{};kXʼny>as\tB 5d$9۝MZ7f2D:+S՝.Wf^.$"\S.78 knZ G6mPD5 [S;*-N|/F,t`m}ӏNX}bbj=UJ öOJJFڰL2.82ږqOdj}ʰ_nTc |4j4qC`bHz1I*PA0_b)b+@` baPQA`JQee`*(Eua Xuu !hUZiZ RF,j-- Q@PX,iI55upaHs?}5,$I %"RYH$ ŵ%f!"BaABC@UY *YB`EQ) Sv\2$(n5ES 93A=1prsn0%F1\S)(8PqX1Rqp&S(8a ke1X2Q,Xet5V,XJj40 "K3Ir,mŁm!U+,QUQ"P5-ݰ72^> `@`joMAGZlhyǑS<^s%6Y Nc3I@!I a!597d J !9[aAyIt?p_uw N_ t;u8IfD@D4.|U=+Q1q$)+dN7[~jueOy,7 (Kl4KzadV?*`[W7V';ɮX,N 2 ٢:$4P'o*0B(U%.i:&M+eI6+˝%ϮavX9mug>4c:s1ZQ3Kƛ殍έMT25&s6A#j$N !#ȃѵj"^Ŷx-J_yJڗZYKW,o]3dDUAE7]0یKx_MS sy;LӤ1vt1zAGE da'0Ѐn!B o\r]wڟ5TpfrX0H$0JRhdN\ f GvqyAyH4Ecvp}ߑAb*,x c(4438, 232.5, 58)) res <- as.factor(ifelse(apply(res, 1, sum) > 2, 1, 0)) res } response <- classify(Smoking[ ,c("TVPS", "BPNL", "COHB")]) smoking <- data.frame(Smoking, response) formula <- response~TVPS+BPNL+COHB~TarY+NicY+COY+Sex+Age inclass(formula, data = smoking, pFUN = list(list(model = lm, predict = mypredict.lm)), cFUN = classify) } \keyword{misc} ipred/man/sbrier.Rd0000644000176200001440000001107213461342153013710 0ustar liggesusers\name{sbrier} \alias{sbrier} \title{ Model Fit for Survival Data } \description{ Model fit for survival data: the integrated Brier score for censored observations. } \usage{ sbrier(obj, pred, btime= range(obj[,1])) } \arguments{ \item{obj}{an object of class \code{Surv}.} \item{pred}{predicted values. Either a probability or a list of \code{survfit} objects. } \item{btime}{numeric vector of times, the integrated Brier score is computed if this is of \code{length > 1}. The Brier score at \code{btime} is returned otherwise.} } \details{ There is no obvious criterion of model fit for censored data. The Brier score for censoring as well as it's integrated version were suggested by Graf et al (1999). The integrated Brier score is always computed over a subset of the interval given by the range of the time slot of the survival object \code{obj}. } \value{ The (integrated) Brier score with attribute \code{time} is returned. } \seealso{ More measures for the validation of predicted surival probabilities are implemented in package \code{pec}. } \references{ Erika Graf, Claudia Schmoor, Willi Sauerbrei and Martin Schumacher (1999), Assessment and comparison of prognostic classification schemes for survival data. \emph{Statistics in Medicine} \bold{18}(17-18), 2529--2545. } \examples{ library("survival") data("DLBCL", package = "ipred") smod <- Surv(DLBCL$time, DLBCL$cens) KM <- survfit(smod ~ 1) # integrated Brier score up to max(DLBCL$time) sbrier(smod, KM) # integrated Brier score up to time=50 sbrier(smod, KM, btime=c(0, 50)) # Brier score for time=50 sbrier(smod, KM, btime=50) # a "real" model: one single survival tree with Intern. Prognostic Index # and mean gene expression in the first cluster as predictors mod <- bagging(Surv(time, cens) ~ MGEc.1 + IPI, data=DLBCL, nbagg=1) # this is a list of survfit objects (==KM-curves), one for each observation # in DLBCL pred <- predict(mod, newdata=DLBCL) # integrated Brier score up to max(time) sbrier(smod, pred) # Brier score at time=50 sbrier(smod, pred, btime=50) # artificial examples and illustrations cleans <- function(x) { attr(x, "time") <- NULL; names(x) <- NULL; x } n <- 100 time <- rpois(n, 20) cens <- rep(1, n) # checks, Graf et al. page 2536, no censoring at all! # no information: \pi(t) = 0.5 a <- sbrier(Surv(time, cens), rep(0.5, n), time[50]) stopifnot(all.equal(cleans(a),0.25)) # some information: \pi(t) = S(t) n <- 100 time <- 1:100 mod <- survfit(Surv(time, cens) ~ 1) a <- sbrier(Surv(time, cens), rep(list(mod), n)) mymin <- mod$surv * (1 - mod$surv) cleans(a) sum(mymin)/diff(range(time)) # independent of ordering rand <- sample(1:100) b <- sbrier(Surv(time, cens)[rand], rep(list(mod), n)[rand]) stopifnot(all.equal(cleans(a), cleans(b))) \testonly{ # total information: \pi(t | X) known for every obs time <- 1:10 cens <- rep(1,10) pred <- diag(10) pred[upper.tri(pred)] <- 1 diag(pred) <- 0 # # a <- sbrier(Surv(time, cens), pred) # stopifnot(all.equal(a, 0)) # } # 2 groups at different risk time <- c(1:10, 21:30) strata <- c(rep(1, 10), rep(2, 10)) cens <- rep(1, length(time)) # no information about the groups a <- sbrier(Surv(time, cens), survfit(Surv(time, cens) ~ 1)) b <- sbrier(Surv(time, cens), rep(list(survfit(Surv(time, cens) ~1)), 20)) stopifnot(all.equal(a, b)) # risk groups known mod <- survfit(Surv(time, cens) ~ strata) b <- sbrier(Surv(time, cens), c(rep(list(mod[1]), 10), rep(list(mod[2]), 10))) stopifnot(a > b) ### GBSG2 data data("GBSG2", package = "TH.data") thsum <- function(x) { ret <- c(median(x), quantile(x, 0.25), quantile(x,0.75)) names(ret)[1] <- "Median" ret } t(apply(GBSG2[,c("age", "tsize", "pnodes", "progrec", "estrec")], 2, thsum)) table(GBSG2$menostat) table(GBSG2$tgrade) table(GBSG2$horTh) # pooled Kaplan-Meier mod <- survfit(Surv(time, cens) ~ 1, data=GBSG2) # integrated Brier score sbrier(Surv(GBSG2$time, GBSG2$cens), mod) # Brier score at 5 years sbrier(Surv(GBSG2$time, GBSG2$cens), mod, btime=1825) # Nottingham prognostic index GBSG2 <- GBSG2[order(GBSG2$time),] NPI <- 0.2*GBSG2$tsize/10 + 1 + as.integer(GBSG2$tgrade) NPI[NPI < 3.4] <- 1 NPI[NPI >= 3.4 & NPI <=5.4] <- 2 NPI[NPI > 5.4] <- 3 mod <- survfit(Surv(time, cens) ~ NPI, data=GBSG2) plot(mod) pred <- c() survs <- c() for (i in sort(unique(NPI))) survs <- c(survs, getsurv(mod[i], 1825)) for (i in 1:nrow(GBSG2)) pred <- c(pred, survs[NPI[i]]) # Brier score of NPI at t=5 years sbrier(Surv(GBSG2$time, GBSG2$cens), pred, btime=1825) } \keyword{survival} ipred/man/summary.inbagg.Rd0000644000176200001440000000101613461342153015342 0ustar liggesusers\name{summary.inbagg} \alias{summary.inbagg} \alias{print.summary.inbagg} \title{Summarising Inbagg} \description{ Summary of inbagg is returned. } \usage{ \method{summary}{inbagg}(object, ...) } \arguments{ \item{object}{an object of class \code{inbagg}.} \item{\dots}{additional arguments.} } \details{ A representation of an indirect bagging model (the intermediates variables, the number of bootstrap samples, the trees) is printed. } \value{ none } \seealso{\code{\link{print.summary.inbagg}}} \keyword{misc} ipred/man/Smoking.Rd0000644000176200001440000000221313461342153014026 0ustar liggesusers\name{Smoking} \alias{Smoking} \non_function{} \title{Smoking Styles} \usage{data("Smoking")} \description{ The \code{Smoking} data frame has 55 rows and 9 columns. } \format{ This data frame contains the following columns: \describe{ \item{NR}{numeric, patient number.} \item{Sex}{factor, sex of patient.} \item{Age}{factor, age group of patient, grouping consisting of those in their twenties, those in their thirties and so on.} \item{TarY}{numeric, tar yields of the cigarettes.} \item{NicY}{numeric, nicotine yields of the cigarettes.} \item{COY}{numeric, carbon monoxide (CO) yield of the cigarettes.} \item{TVPS}{numeric, total volume puffed smoke.} \item{BPNL}{numeric, blood plasma nicotine level.} \item{COHB}{numeric, carboxyhaemoglobin level, i.e. amount of CO absorbed by the blood stream.} } } \details{ The data describes different smoking habits of probands. } \source{ Hand and Taylor (1987), Study F \emph{Smoking Styles}. } \references{ D.J. Hand and C.C. Taylor (1987), \emph{Multivariate analysis of variance and repeated measures.} London: Chapman \& Hall, pp. 167--181. } \keyword{datasets} ipred/man/predict.bagging.Rd0000644000176200001440000000612113461342153015450 0ustar liggesusers\name{predict.classbagg} \alias{predict.classbagg} \alias{predict.regbagg} \alias{predict.survbagg} \title{ Predictions from Bagging Trees } \description{ Predict the outcome of a new observation based on multiple trees. } \usage{ \method{predict}{classbagg}(object, newdata=NULL, type=c("class", "prob"), aggregation=c("majority", "average", "weighted"), \dots) \method{predict}{regbagg}(object, newdata=NULL, aggregation=c("average", "weighted"), \dots) \method{predict}{survbagg}(object, newdata=NULL,\dots) } \arguments{ \item{object}{object of classes \code{classbagg}, \code{regbagg} or \code{survbagg}.} \item{newdata}{a data frame of new observations. } \item{type}{character string denoting the type of predicted value returned for classification trees. Either \code{class} (predicted classes are returned) or \code{prob} (estimated class probabilities are returned).} \item{aggregation}{character string specifying how to aggregate, see below.} \item{...}{additional arguments, currently not passed to any function.} } \details{ There are (at least) three different ways to aggregate the predictions of bagging classification trees. Most famous is class majority voting (\code{aggregation="majority"}) where the most frequent class is returned. The second way is choosing the class with maximal averaged class probability (\code{aggregation="average"}). The third method is based on the "aggregated learning sample", introduced by Hothorn et al. (2003) for survival trees. The prediction of a new observation is the majority class, mean or Kaplan-Meier curve of all observations from the learning sample identified by the \code{nbagg} leaves containing the new observation. For regression trees, only averaged or weighted predictions are possible. By default, the out-of-bag estimate is computed if \code{newdata} is NOT specified. Therefore, the predictions of \code{predict(object)} are "honest" in some way (this is not possible for combined models via \code{comb} in \code{\link{bagging}}). If you like to compute the predictions for the learning sample itself, use \code{newdata} to specify your data. } \value{ The predicted class or estimated class probabilities are returned for classification trees. The predicted endpoint is returned in regression problems and the predicted Kaplan-Meier curve is returned for survival trees. } \references{ Leo Breiman (1996), Bagging Predictors. \emph{Machine Learning} \bold{24}(2), 123--140. Torsten Hothorn, Berthold Lausen, Axel Benner and Martin Radespiel-Troeger (2004), Bagging Survival Trees. \emph{Statistics in Medicine}, \bold{23}(1), 77--91. } \examples{ data("Ionosphere", package = "mlbench") Ionosphere$V2 <- NULL # constant within groups # nbagg = 10 for performance reasons here mod <- bagging(Class ~ ., data=Ionosphere) # out-of-bag estimate mean(predict(mod) != Ionosphere$Class) # predictions for the first 10 observations predict(mod, newdata=Ionosphere[1:10,]) predict(mod, newdata=Ionosphere[1:10,], type="prob") } \keyword{tree} ipred/man/mypredict.lm.Rd0000644000176200001440000000137113461342153015032 0ustar liggesusers\name{mypredict.lm} \alias{mypredict.lm} \title{Predictions Based on Linear Models} \description{ Function to predict a vector of full length (number of observations), where predictions according to missing explanatory values are replaced by \code{NA}. } \usage{ mypredict.lm(object, newdata) } \arguments{ \item{object}{an object of class \code{lm}.} \item{newdata}{matrix or data frame to be predicted according to \code{object}.} } \value{ Vector of predicted values. } \note{\code{predict.lm} delivers a vector of reduced length, i.e. rows where explanatory variables are missing are omitted. The full length of the predicted observation vector is necessary in the indirect classification approach (\code{\link{predict.inclass}}).} \keyword{misc} ipred/man/ipred-internal.Rd0000644000176200001440000000034213461342153015335 0ustar liggesusers\name{ipred-internal} \alias{getsurv} \title{Internal ipred functions} \description{ Internal ipred functions. } \usage{ getsurv(obj, times) } \details{ This functions are not to be called by the user. } \keyword{internal} ipred/man/rsurv.Rd0000644000176200001440000000330413461342153013602 0ustar liggesusers\name{rsurv} \alias{rsurv} \title{ Simulate Survival Data } \description{ Simulation Setup for Survival Data. } \usage{ rsurv(N, model=c("A", "B", "C", "D", "tree"), gamma=NULL, fact=1, pnon=10, gethaz=FALSE) } \arguments{ \item{N}{ number of observations. } \item{model}{ type of model. } \item{gamma}{simulate censoring time as runif(N, 0, gamma). Defaults to \code{NULL} (no censoring).} \item{fact}{scale parameter for \code{model=tree}.} \item{pnon}{number of additional non-informative variables for the tree model.} \item{gethaz}{logical, indicating wheather the hazard rate for each observation should be returned.} } \details{ Simulation setup similar to configurations used in LeBlanc and Crowley (1992) or Keles and Segal (2002) as well as a tree model used in Hothorn et al. (2004). See Hothorn et al. (2004) for the details. } \value{ A data frame with elements \code{time}, \code{cens}, \code{X1} ... \code{X5}. If \code{pnon} > 0, additional noninformative covariables are added. If \code{gethaz=TRUE}, the \code{hazard} attribute returns the hazard rates. } \references{ M. LeBlanc and J. Crowley (1992), Relative Risk Trees for Censored Survival Data. \emph{Biometrics} \bold{48}, 411--425. S. Keles and M. R. Segal (2002), Residual-based tree-structured survival analysis. \emph{Statistics in Medicine}, \bold{21}, 313--326. Torsten Hothorn, Berthold Lausen, Axel Benner and Martin Radespiel-Troeger (2004), Bagging Survival Trees. \emph{Statistics in Medicine}, \bold{23}(1), 77--91. } \examples{ library("survival") # 3*X1 + X2 simdat <- rsurv(500, model="C") coxph(Surv(time, cens) ~ ., data=simdat) } \keyword{survival} ipred/man/kfoldcv.Rd0000644000176200001440000000125213461342153014051 0ustar liggesusers\name{kfoldcv} \alias{kfoldcv} \title{ Subsamples for k-fold Cross-Validation } \description{ Computes feasible sample sizes for the k groups in k-fold cv if N/k is not an integer. } \usage{ kfoldcv(k, N, nlevel=NULL) } \arguments{ \item{k}{ number of groups. } \item{N}{ total sample size. } \item{nlevel}{ a vector of sample sizes for stratified sampling.} } \details{ If N/k is not an integer, k-fold cv is not unique. Determine meaningful sample sizes. } \value{ A vector of length \code{k}. } \examples{ # 10-fold CV with N = 91 kfoldcv(10, 91) \testonly{ k <- sample(5:15, 1) k N <- sample(50:150, 1) N stopifnot(sum(kfoldcv(k, N)) == N) } } \keyword{misc} ipred/man/prune.bagging.Rd0000644000176200001440000000205013461342153015144 0ustar liggesusers\name{prune.classbagg} \alias{prune.classbagg} \alias{prune.regbagg} \alias{prune.survbagg} \title{ Pruning for Bagging } \description{ Prune each of the trees returned by \code{\link{bagging}}. } \usage{ \method{prune}{classbagg}(tree, cp=0.01,...) } \arguments{ \item{tree}{ an object returned by \code{\link{bagging}} (calling this \code{tree} is needed by the generic function \code{prune} in package \code{rpart}).} \item{cp}{complexity parameter, see \code{\link[rpart]{prune.rpart}}.} \item{...}{additional arguments to \code{\link[rpart]{prune.rpart}}.} } \details{ By default, \code{\link{bagging}} grows classification trees of maximal size. One may want to prune each tree, however, it is not clear whether or not this may decrease prediction error. } \value{ An object of the same class as \code{tree} with the trees pruned. } \examples{ data("Glass", package = "mlbench") library("rpart") mod <- bagging(Type ~ ., data=Glass, nbagg=10, coob=TRUE) pmod <- prune(mod) print(pmod) } \keyword{tree} ipred/man/print.cvclass.Rd0000644000176200001440000000106713461342153015216 0ustar liggesusers\name{print.cvclass} \alias{print.cvclass} \alias{print.cvreg} \alias{print.cvsurv} \alias{print.bootestclass} \alias{print.bootestreg} \alias{print.bootestsurv} \title{Print Method for Error Rate Estimators} \description{ Print objects returned by \code{\link{errorest}} in nice layout. } \usage{ \method{print}{cvclass}(x, digits=4, ...) } \arguments{ \item{x}{an object returned by \code{\link{errorest}}.} \item{digits}{how many digits should be printed.} \item{\dots}{further arguments to be passed to or from methods.} } \value{ none } \keyword{misc} ipred/man/predict.slda.Rd0000644000176200001440000000144113461342153014775 0ustar liggesusers\name{predict.slda} \alias{predict.slda} \title{ Predictions from Stabilised Linear Discriminant Analysis } \description{ Predict the class of a new observation based on stabilised LDA. } \usage{ \method{predict}{slda}(object, newdata, ...) } \arguments{ \item{object}{object of class \code{slda}.} \item{newdata}{a data frame of new observations. } \item{...}{additional arguments passed to \code{\link[MASS]{predict.lda}}.} } \details{ This function is a method for the generic function \code{\link{predict}} for class \code{slda}. For the details see \code{\link[MASS]{predict.lda}}. } \value{ A list with components \item{class}{the predicted class (a factor).} \item{posterior}{posterior probabilities for the classes.} \item{x}{the scores of test cases.} } \keyword{multivariate} ipred/man/ipredknn.Rd0000644000176200001440000000260013461342153014231 0ustar liggesusers\name{ipredknn} \alias{ipredknn} \title{ k-Nearest Neighbour Classification } \description{ $k$-nearest neighbour classification with an interface compatible to \code{\link{bagging}} and \code{\link{errorest}}. } \usage{ ipredknn(formula, data, subset, na.action, k=5, \dots) } \arguments{ \item{formula}{a formula of the form \code{lhs ~ rhs} where \code{lhs} is the response variable and \code{rhs} a set of predictors.} \item{data}{optional data frame containing the variables in the model formula.} \item{subset}{optional vector specifying a subset of observations to be used.} \item{na.action}{function which indicates what should happen when the data contain \code{NA}s.} \item{k}{number of neighbours considered, defaults to 5.} \item{...}{additional parameters.} } \details{ This is a wrapper to \code{\link[class]{knn}} in order to be able to use k-NN in \code{\link{bagging}} and \code{\link{errorest}}. } \value{ An object of class \code{ipredknn}. See \code{\link{predict.ipredknn}}. } \examples{ library("mlbench") learn <- as.data.frame(mlbench.twonorm(300)) mypredict.knn <- function(object, newdata) predict.ipredknn(object, newdata, type="class") errorest(classes ~., data=learn, model=ipredknn, predict=mypredict.knn) } \keyword{multivariate} ipred/man/bagging.Rd0000644000176200001440000002330314017735326014026 0ustar liggesusers\name{bagging} \alias{bagging} \alias{ipredbagg} \alias{ipredbagg.factor} \alias{ipredbagg.integer} \alias{ipredbagg.numeric} \alias{ipredbagg.Surv} \alias{ipredbagg.default} \alias{bagging.data.frame} \alias{bagging.default} \title{Bagging Classification, Regression and Survival Trees } \description{ Bagging for classification, regression and survival trees. } \usage{ \method{ipredbagg}{factor}(y, X=NULL, nbagg=25, control= rpart.control(minsplit=2, cp=0, xval=0), comb=NULL, coob=FALSE, ns=length(y), keepX = TRUE, \dots) \method{ipredbagg}{numeric}(y, X=NULL, nbagg=25, control=rpart.control(xval=0), comb=NULL, coob=FALSE, ns=length(y), keepX = TRUE, \dots) \method{ipredbagg}{Surv}(y, X=NULL, nbagg=25, control=rpart.control(xval=0), comb=NULL, coob=FALSE, ns=dim(y)[1], keepX = TRUE, \dots) \method{bagging}{data.frame}(formula, data, subset, na.action=na.rpart, \dots) } \arguments{ \item{y}{the response variable: either a factor vector of class labels (bagging classification trees), a vector of numerical values (bagging regression trees) or an object of class \code{\link[survival]{Surv}} (bagging survival trees).} \item{X}{a data frame of predictor variables.} \item{nbagg}{an integer giving the number of bootstrap replications. } \item{coob}{a logical indicating whether an out-of-bag estimate of the error rate (misclassification error, root mean squared error or Brier score) should be computed. See \code{\link{predict.classbagg}} for details.} \item{control}{options that control details of the \code{rpart} algorithm, see \code{\link[rpart]{rpart.control}}. It is wise to set \code{xval = 0} in order to save computing time. Note that the default values depend on the class of \code{y}.} \item{comb}{a list of additional models for model combination, see below for some examples. Note that argument \code{method} for double-bagging is no longer there, \code{comb} is much more flexible.} \item{ns}{number of sample to draw from the learning sample. By default, the usual bootstrap n out of n with replacement is performed. If \code{ns} is smaller than \code{length(y)}, subagging (Buehlmann and Yu, 2002), i.e. sampling \code{ns} out of \code{length(y)} without replacement, is performed.} \item{keepX}{a logical indicating whether the data frame of predictors should be returned. Note that the computation of the out-of-bag estimator requires \code{keepX=TRUE}.} \item{formula}{a formula of the form \code{lhs ~ rhs} where \code{lhs} is the response variable and \code{rhs} a set of predictors.} \item{data}{optional data frame containing the variables in the model formula.} \item{subset}{optional vector specifying a subset of observations to be used.} \item{na.action}{function which indicates what should happen when the data contain \code{NA}s. Defaults to \code{\link[rpart]{na.rpart}}.} \item{...}{additional parameters passed to \code{ipredbagg} or \code{\link[rpart]{rpart}}, respectively.} } \details{ The random forest implementations \code{\link[randomForest]{randomForest}} and \code{\link[party]{cforest}} are more flexible and reliable for computing bootstrap-aggregated trees than this function and should be used instead. Bagging for classification and regression trees were suggested by Breiman (1996a, 1998) in order to stabilise trees. The trees in this function are computed using the implementation in the \code{\link[rpart]{rpart}} package. The generic function \code{ipredbagg} implements methods for different responses. If \code{y} is a factor, classification trees are constructed. For numerical vectors \code{y}, regression trees are aggregated and if \code{y} is a survival object, bagging survival trees (Hothorn et al, 2003) is performed. The function \code{bagging} offers a formula based interface to \code{ipredbagg}. \code{nbagg} bootstrap samples are drawn and a tree is constructed for each of them. There is no general rule when to stop the tree growing. The size of the trees can be controlled by \code{control} argument or \code{\link{prune.classbagg}}. By default, classification trees are as large as possible whereas regression trees and survival trees are build with the standard options of \code{\link[rpart]{rpart.control}}. If \code{nbagg=1}, one single tree is computed for the whole learning sample without bootstrapping. If \code{coob} is TRUE, the out-of-bag sample (Breiman, 1996b) is used to estimate the prediction error corresponding to \code{class(y)}. Alternatively, the out-of-bag sample can be used for model combination, an out-of-bag error rate estimator is not available in this case. Double-bagging (Hothorn and Lausen, 2003) computes a LDA on the out-of-bag sample and uses the discriminant variables as additional predictors for the classification trees. \code{comb} is an optional list of lists with two elements \code{model} and \code{predict}. \code{model} is a function with arguments \code{formula} and \code{data}. \code{predict} is a function with arguments \code{object, newdata} only. If the estimation of the covariance matrix in \code{\link{lda}} fails due to a limited out-of-bag sample size, one can use \code{\link{slda}} instead. See the example section for an example of double-bagging. The methodology is not limited to a combination with LDA: bundling (Hothorn and Lausen, 2002b) can be used with arbitrary classifiers. NOTE: Up to ipred version 0.9-0, bagging was performed using a modified version of the original rpart function. Due to interface changes in rpart 3.1-55, the bagging function had to be rewritten. Results of previous version are not exactly reproducible. } \value{ The class of the object returned depends on \code{class(y)}: \code{classbagg, regbagg} and \code{survbagg}. Each is a list with elements \item{y}{the vector of responses.} \item{X}{the data frame of predictors.} \item{mtrees}{multiple trees: a list of length \code{nbagg} containing the trees (and possibly additional objects) for each bootstrap sample.} \item{OOB}{logical whether the out-of-bag estimate should be computed.} \item{err}{if \code{OOB=TRUE}, the out-of-bag estimate of misclassification or root mean squared error or the Brier score for censored data.} \item{comb}{logical whether a combination of models was requested.} For each class methods for the generics \code{\link[rpart]{prune.rpart}}, \code{\link{print}}, \code{\link{summary}} and \code{\link{predict}} are available for inspection of the results and prediction, for example: \code{\link{print.classbagg}}, \code{\link{summary.classbagg}}, \code{\link{predict.classbagg}} and \code{\link{prune.classbagg}} for classification problems. } \references{ Leo Breiman (1996a), Bagging Predictors. \emph{Machine Learning} \bold{24}(2), 123--140. Leo Breiman (1996b), Out-Of-Bag Estimation. \emph{Technical Report} \url{https://www.stat.berkeley.edu/~breiman/OOBestimation.pdf}. Leo Breiman (1998), Arcing Classifiers. \emph{The Annals of Statistics} \bold{26}(3), 801--824. Peter Buehlmann and Bin Yu (2002), Analyzing Bagging. \emph{The Annals of Statistics} \bold{30}(4), 927--961. Torsten Hothorn and Berthold Lausen (2003), Double-Bagging: Combining classifiers by bootstrap aggregation. \emph{Pattern Recognition}, \bold{36}(6), 1303--1309. Torsten Hothorn and Berthold Lausen (2005), Bundling Classifiers by Bagging Trees. \emph{Computational Statistics & Data Analysis}, 49, 1068--1078. Torsten Hothorn, Berthold Lausen, Axel Benner and Martin Radespiel-Troeger (2004), Bagging Survival Trees. \emph{Statistics in Medicine}, \bold{23}(1), 77--91. } \examples{ library("MASS") library("survival") # Classification: Breast Cancer data data("BreastCancer", package = "mlbench") # Test set error bagging (nbagg = 50): 3.7\% (Breiman, 1998, Table 5) mod <- bagging(Class ~ Cl.thickness + Cell.size + Cell.shape + Marg.adhesion + Epith.c.size + Bare.nuclei + Bl.cromatin + Normal.nucleoli + Mitoses, data=BreastCancer, coob=TRUE) print(mod) # Test set error bagging (nbagg=50): 7.9\% (Breiman, 1996a, Table 2) data("Ionosphere", package = "mlbench") Ionosphere$V2 <- NULL # constant within groups bagging(Class ~ ., data=Ionosphere, coob=TRUE) # Double-Bagging: combine LDA and classification trees # predict returns the linear discriminant values, i.e. linear combinations # of the original predictors comb.lda <- list(list(model=lda, predict=function(obj, newdata) predict(obj, newdata)$x)) # Note: out-of-bag estimator is not available in this situation, use # errorest mod <- bagging(Class ~ ., data=Ionosphere, comb=comb.lda) predict(mod, Ionosphere[1:10,]) # Regression: data("BostonHousing", package = "mlbench") # Test set error (nbagg=25, trees pruned): 3.41 (Breiman, 1996a, Table 8) mod <- bagging(medv ~ ., data=BostonHousing, coob=TRUE) print(mod) library("mlbench") learn <- as.data.frame(mlbench.friedman1(200)) # Test set error (nbagg=25, trees pruned): 2.47 (Breiman, 1996a, Table 8) mod <- bagging(y ~ ., data=learn, coob=TRUE) print(mod) # Survival data # Brier score for censored data estimated by # 10 times 10-fold cross-validation: 0.2 (Hothorn et al, # 2002) data("DLBCL", package = "ipred") mod <- bagging(Surv(time,cens) ~ MGEc.1 + MGEc.2 + MGEc.3 + MGEc.4 + MGEc.5 + MGEc.6 + MGEc.7 + MGEc.8 + MGEc.9 + MGEc.10 + IPI, data=DLBCL, coob=TRUE) print(mod) } \keyword{tree} ipred/man/print.bagging.Rd0000644000176200001440000000076613461342153015163 0ustar liggesusers\name{print.classbagg} \alias{print} \alias{print.classbagg} \alias{print.regbagg} \alias{print.survbagg} \title{Print Method for Bagging Trees} \description{ Print objects returned by \code{\link{bagging}} in nice layout. } \usage{ \method{print}{classbagg}(x, digits, \dots) } \arguments{ \item{x}{object returned by \code{\link{bagging}}.} \item{digits}{how many digits should be printed.} \item{\dots}{further arguments to be passed to or from methods.} } \value{ none } \keyword{tree} ipred/man/predict.inclass.Rd0000644000176200001440000000644413461342153015516 0ustar liggesusers\name{predict.inclass} \alias{predict.inclass} \title{Predictions from an Inclass Object} \description{ Predicts the class membership of new observations through indirect classification. } \usage{ \method{predict}{inclass}(object, newdata, ...) } \arguments{ \item{object}{ object of class \code{inclass}, see \code{\link{inclass}}.} \item{newdata}{data frame to be classified.} \item{\dots}{additional arguments corresponding to the predictive models specified in \code{\link{inclass}}.} } \details{ Predictions of class memberships are calculated. i.e. values of the intermediate variables are predicted and classified following \code{cFUN}, see \code{\link{inclass}}. } \value{ The vector of predicted classes is returned. } \references{ David J. Hand, Hua Gui Li, Niall M. Adams (2001), Supervised classification with structured class definitions. \emph{Computational Statistics & Data Analysis} \bold{36}, 209--225. Andrea Peters, Berthold Lausen, Georg Michelson and Olaf Gefeller (2003), Diagnosis of glaucoma by indirect classifiers. \emph{Methods of Information in Medicine} \bold{1}, 99-103. } \seealso{\code{\link{inclass}}} \examples{ \dontrun{ # Simulation model, classification rule following Hand et al. (2001) theta90 <- varset(N = 1000, sigma = 0.1, theta = 90, threshold = 0) dataset <- as.data.frame(cbind(theta90$explanatory, theta90$intermediate)) names(dataset) <- c(colnames(theta90$explanatory), colnames(theta90$intermediate)) classify <- function(Y, threshold = 0) { Y <- Y[,c("y1", "y2")] z <- (Y > threshold) resp <- as.factor(ifelse((z[,1] + z[,2]) > 1, 1, 0)) return(resp) } formula <- response~y1+y2~x1+x2 fit <- inclass(formula, data = dataset, pFUN = list(list(model = lm)), cFUN = classify) predict(object = fit, newdata = dataset) data("Smoking", package = "ipred") # explanatory variables are: TarY, NicY, COY, Sex, Age # intermediate variables are: TVPS, BPNL, COHB # reponse is defined by: classify <- function(data){ data <- data[,c("TVPS", "BPNL", "COHB")] res <- t(t(data) > c(4438, 232.5, 58)) res <- as.factor(ifelse(apply(res, 1, sum) > 2, 1, 0)) res } response <- classify(Smoking[ ,c("TVPS", "BPNL", "COHB")]) smoking <- cbind(Smoking, response) formula <- response~TVPS+BPNL+COHB~TarY+NicY+COY+Sex+Age fit <- inclass(formula, data = smoking, pFUN = list(list(model = lm)), cFUN = classify) predict(object = fit, newdata = smoking) } data("GlaucomaMVF", package = "ipred") library("rpart") glaucoma <- GlaucomaMVF[,(names(GlaucomaMVF) != "tension")] # explanatory variables are derived by laser scanning image and intra occular pressure # intermediate variables are: clv, cs, lora # response is defined by classify <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } fit <- inclass(Class~clv+lora+cs~., data = glaucoma, pFUN = list(list(model = rpart)), cFUN = classify) data("GlaucomaM", package = "TH.data") predict(object = fit, newdata = GlaucomaM) } \keyword{misc} ipred/man/DLBCL.Rd0000644000176200001440000000347613461342153013253 0ustar liggesusers\name{DLBCL} \alias{DLBCL} \non_function{} \title{ Diffuse Large B-Cell Lymphoma } \usage{data("DLBCL")} \description{ A data frame with gene expression data from diffuse large B-cell lymphoma (DLBCL) patients. } \format{ This data frame contains the following columns: \describe{ \item{DLCL.Sample}{DLBCL identifier.} \item{Gene.Expression}{Gene expression group.} \item{time}{survival time in month.} \item{cens}{censoring: 0 censored, 1 dead.} \item{IPI}{International prognostic index.} \item{MGEc.1}{mean gene expression in cluster 1.} \item{MGEc.2}{mean gene expression in cluster 2.} \item{MGEc.3}{mean gene expression in cluster 3.} \item{MGEc.4}{mean gene expression in cluster 4.} \item{MGEc.5}{mean gene expression in cluster 5.} \item{MGEc.6}{mean gene expression in cluster 6.} \item{MGEc.7}{mean gene expression in cluster 7.} \item{MGEc.8}{mean gene expression in cluster 8.} \item{MGEc.9}{mean gene expression in cluster 9.} \item{MGEc.10}{mean gene expression in cluster 10.} } } \source{ Except of \code{MGE}, the data is published at \url{http://llmpp.nih.gov/lymphoma/data.shtml}. \code{MGEc.*} is the mean of the gene expression in each of ten clusters derived by agglomerative average linkage hierarchical cluster analysis (Hothorn et al., 2002). } \references{ Ash A. Alizadeh et. al (2000), Distinct types of diffuse large B-cell lymphoma identified by gene expression profiling. \emph{Nature}, \bold{403}, 504--509. Torsten Hothorn, Berthold Lausen, Axel Benner and Martin Radespiel-Troeger (2004), Bagging Survival Trees. \emph{Statistics in Medicine}, \bold{23}, 77--91. } \examples{ suppressWarnings(RNGversion("3.5.3")) set.seed(290875) data("DLBCL", package="ipred") library("survival") survfit(Surv(time, cens) ~ 1, data=DLBCL) } \keyword{datasets} ipred/man/summary.bagging.Rd0000644000176200001440000000103613461342153015513 0ustar liggesusers\name{summary.classbagg} \alias{summary.classbagg} \alias{summary.regbagg} \alias{summary.survbagg} \alias{print.summary.bagging} \title{Summarising Bagging} \description{ \code{summary} method for objects returned by \code{\link{bagging}}. } \usage{ \method{summary}{classbagg}(object, \dots) } \arguments{ \item{object}{object returned by \code{\link{bagging}}.} \item{\dots}{further arguments to be passed to or from methods.} } \details{ A representation of all trees in the object is printed. } \value{ none } \keyword{tree} ipred/man/varset.Rd0000644000176200001440000000323413461342153013727 0ustar liggesusers\name{varset} \alias{varset} \title{Simulation Model} \description{ Three sets of variables are calculated: explanatory, intermediate and response variables. } \usage{ varset(N, sigma=0.1, theta=90, threshold=0, u=1:3) } \arguments{ \item{N}{number of simulated observations.} \item{sigma}{standard deviation of the error term.} \item{theta}{angle between two u vectors.} \item{threshold}{cutpoint for classifying to 0 or 1.} \item{u}{starting values.} } \details{ For each observation values of two explanatory variables \eqn{x = (x_1, x_2)^{\top}} and of two responses \eqn{y = (y_1, y_2)^{\top}} are simulated, following the formula: \deqn{ y = U*x+e = ({u_1^{\top} \atop u_2^{\top}})*x+e } where x is the evaluation of as standard normal random variable and e is generated by a normal variable with standard deviation \code{sigma}. U is a 2*2 Matrix, where \deqn{ u_1 = ({u_{1, 1} \atop u_{1, 2}}), u_2 = ({u_{2, 1} \atop u_{2, 2}}), ||u_1|| = ||u_2|| = 1, } i.e. a matrix of two normalised vectors. } \value{ A list containing the following arguments \item{explanatory}{N*2 matrix of 2 explanatory variables.} \item{intermediate}{N*2 matrix of 2 intermediate variables.} \item{response}{response vectors with values 0 or 1.} } \references{ David J. Hand, Hua Gui Li, Niall M. Adams (2001), Supervised classification with structured class definitions. \emph{Computational Statistics & Data Analysis} \bold{36}, 209--225. } \examples{ theta90 <- varset(N = 1000, sigma = 0.1, theta = 90, threshold = 0) theta0 <- varset(N = 1000, sigma = 0.1, theta = 0, threshold = 0) par(mfrow = c(1, 2)) plot(theta0$intermediate) plot(theta90$intermediate) } \keyword{misc} ipred/man/summary.inclass.Rd0000644000176200001440000000105513461342153015552 0ustar liggesusers\name{summary.inclass} \alias{summary.inclass} \alias{print.summary.inclass} \title{Summarising Inclass} \description{ Summary of inclass is returned. } \usage{ \method{summary}{inclass}(object, ...) } \arguments{ \item{object}{an object of class \code{inclass}.} \item{\dots}{additional arguments.} } \details{ A representation of an indirect classification model (the intermediates variables, which modelling technique is used and the prediction model) is printed. } \value{ none } \seealso{\code{\link{print.summary.inclass}}} \keyword{misc} ipred/man/inbagg.Rd0000644000176200001440000001145113461342153013652 0ustar liggesusers\name{inbagg} \alias{inbagg} \alias{inbagg.default} \alias{inbagg.data.frame} \title{Indirect Bagging} \description{ Function to perform the indirect bagging and subagging. } \usage{ \method{inbagg}{data.frame}(formula, data, pFUN=NULL, cFUN=list(model = NULL, predict = NULL, training.set = NULL), nbagg = 25, ns = 0.5, replace = FALSE, ...) } \arguments{ \item{formula}{formula. A \code{formula} specified as \code{y~w1+w2+w3~x1+x2+x3} describes how to model the intermediate variables \code{w1, w2, w3} and the response variable \code{y}, if no other formula is specified by the elements of \code{pFUN} or in \code{cFUN}} \item{data}{data frame of explanatory, intermediate and response variables.} \item{pFUN}{list of lists, which describe models for the intermediate variables, details are given below.} \item{cFUN}{either a fixed function with argument \code{newdata} and returning the class membership by default, or a list specifying a classifying model, similar to one element of \code{pFUN}. Details are given below.} \item{nbagg}{number of bootstrap samples.} \item{ns}{proportion of sample to be drawn from the learning sample. By default, subagging with 50\% is performed, i.e. draw 0.5*n out of n without replacement.} \item{replace}{logical. Draw with or without replacement.} \item{\dots}{additional arguments (e.g. \code{subset}).} } \details{ A given data set is subdivided into three types of variables: explanatory, intermediate and response variables.\cr Here, each specified intermediate variable is modelled separately following \code{pFUN}, a list of lists with elements specifying an arbitrary number of models for the intermediate variables and an optional element \code{training.set = c("oob", "bag", "all")}. The element \code{training.set} determines whether, predictive models for the intermediate are calculated based on the out-of-bag sample (\code{"oob"}), the default, on the bag sample (\code{"bag"}) or on all available observations (\code{"all"}). The elements of \code{pFUN}, specifying the models for the intermediate variables are lists as described in \code{\link{inclass}}. Note that, if no formula is given in these elements, the functional relationship of \code{formula} is used.\cr The response variable is modelled following \code{cFUN}. This can either be a fixed classifying function as described in Peters et al. (2003) or a list, which specifies the modelling technique to be applied. The list contains the arguments \code{model} (which model to be fitted), \code{predict} (optional, how to predict), \code{formula} (optional, of type \code{y~w1+w2+w3+x1+x2} determines the variables the classifying function is based on) and the optional argument \code{training.set = c("fitted.bag", "original", "fitted.subset")} specifying whether the classifying function is trained on the predicted observations of the bag sample (\code{"fitted.bag"}), on the original observations (\code{"original"}) or on the predicted observations not included in a defined subset (\code{"fitted.subset"}). Per default the formula specified in \code{formula} determines the variables, the classifying function is based on.\cr Note that the default of \code{cFUN = list(model = NULL, training.set = "fitted.bag")} uses the function \code{\link[rpart]{rpart}} and the predict function \code{predict(object, newdata, type = "class")}. } \value{ An object of class \code{"inbagg"}, that is a list with elements \item{mtrees}{a list of length \code{nbagg}, describing the prediction models corresponding to each bootstrap sample. Each element of \code{mtrees} is a list with elements \code{bindx} (observations of bag sample), \code{btree} (classifying function of bag sample) and \code{bfct} (predictive models for intermediates of bag sample).} \item{y}{vector of response values.} \item{W}{data frame of intermediate variables.} \item{X}{data frame of explanatory variables.} } \references{ David J. Hand, Hua Gui Li, Niall M. Adams (2001), Supervised classification with structured class definitions. \emph{Computational Statistics & Data Analysis} \bold{36}, 209--225. Andrea Peters, Berthold Lausen, Georg Michelson and Olaf Gefeller (2003), Diagnosis of glaucoma by indirect classifiers. \emph{Methods of Information in Medicine} \bold{1}, 99-103. } \seealso{\code{\link[rpart]{rpart}}, \code{\link{bagging}}, \code{\link{lm}}} \examples{ library("MASS") library("rpart") y <- as.factor(sample(1:2, 100, replace = TRUE)) W <- mvrnorm(n = 200, mu = rep(0, 3), Sigma = diag(3)) X <- mvrnorm(n = 200, mu = rep(2, 3), Sigma = diag(3)) colnames(W) <- c("w1", "w2", "w3") colnames(X) <- c("x1", "x2", "x3") DATA <- data.frame(y, W, X) pFUN <- list(list(formula = w1~x1+x2, model = lm, predict = mypredict.lm), list(model = rpart)) inbagg(y~w1+w2+w3~x1+x2+x3, data = DATA, pFUN = pFUN) } \keyword{misc} ipred/man/GlaucomaMVF.Rd0000644000176200001440000001214613461342153014526 0ustar liggesusers\name{GlaucomaMVF} \alias{GlaucomaMVF} \non_function{} \title{ Glaucoma Database } \usage{data("GlaucomaMVF")} \description{ The \code{GlaucomaMVF} data has 170 observations in two classes. 66 predictors are derived from a confocal laser scanning image of the optic nerve head, from a visual field test, a fundus photography and a measurement of the intra occular pressure. } \format{ This data frame contains the following predictors describing the morphology of the optic nerve head, the visual field, the intra occular pressure and a membership variable: \describe{ \item{ag}{area global.} \item{at}{area temporal.} \item{as}{area superior.} \item{an}{area nasal.} \item{ai}{area inferior.} \item{eag}{effective area global.} \item{eat}{effective area temporal.} \item{eas}{effective area superior.} \item{ean}{effective area nasal.} \item{eai}{effective area inferior.} \item{abrg}{area below reference global.} \item{abrt}{area below reference temporal.} \item{abrs}{area below reference superior.} \item{abrn}{area below reference nasal.} \item{abri}{area below reference inferior.} \item{hic}{height in contour.} \item{mhcg}{mean height contour global.} \item{mhct}{mean height contour temporal.} \item{mhcs}{mean height contour superior.} \item{mhcn}{mean height contour nasal.} \item{mhci}{mean height contour inferior.} \item{phcg}{peak height contour.} \item{phct}{peak height contour temporal.} \item{phcs}{peak height contour superior.} \item{phcn}{peak height contour nasal.} \item{phci}{peak height contour inferior.} \item{hvc}{height variation contour.} \item{vbsg}{volume below surface global.} \item{vbst}{volume below surface temporal.} \item{vbss}{volume below surface superior.} \item{vbsn}{volume below surface nasal.} \item{vbsi}{volume below surface inferior.} \item{vasg}{volume above surface global.} \item{vast}{volume above surface temporal.} \item{vass}{volume above surface superior.} \item{vasn}{volume above surface nasal.} \item{vasi}{volume above surface inferior.} \item{vbrg}{volume below reference global.} \item{vbrt}{volume below reference temporal.} \item{vbrs}{volume below reference superior.} \item{vbrn}{volume below reference nasal.} \item{vbri}{volume below reference inferior.} \item{varg}{volume above reference global.} \item{vart}{volume above reference temporal.} \item{vars}{volume above reference superior.} \item{varn}{volume above reference nasal.} \item{vari}{volume above reference inferior.} \item{mdg}{mean depth global.} \item{mdt}{mean depth temporal.} \item{mds}{mean depth superior.} \item{mdn}{mean depth nasal.} \item{mdi}{mean depth inferior.} \item{tmg}{third moment global.} \item{tmt}{third moment temporal.} \item{tms}{third moment superior.} \item{tmn}{third moment nasal.} \item{tmi}{third moment inferior.} \item{mr}{mean radius.} \item{rnf}{retinal nerve fiber thickness.} \item{mdic}{mean depth in contour.} \item{emd}{effective mean depth.} \item{mv}{mean variability.} \item{tension}{intra occular pressure.} \item{clv}{corrected loss variance, variability of the visual field.} \item{cs}{contrast sensitivity of the visual field.} \item{lora}{loss of rim area, measured by fundus photography.} \item{Class}{a factor with levels \code{glaucoma} and \code{normal}.} } } \details{ Confocal laser images of the eye background are taken with the Heidelberg Retina Tomograph and variables 1-62 are derived. Most of these variables describe either the area or volume in certain parts of the papilla and are measured in four sectors (temporal, superior, nasal and inferior) as well as for the whole papilla (global). The global measurement is, roughly, the sum of the measurements taken in the four sector. The perimeter `Octopus' measures the visual field variables \code{clv} and \code{cs}, stereo optic disks photographs were taken with a telecentric fundus camera and \code{lora} is derived. Observations of both groups are matched by age and sex, to prevent for possible confounding. } \source{ Andrea Peters, Berthold Lausen, Georg Michelson and Olaf Gefeller (2003), Diagnosis of glaucoma by indirect classifiers. \emph{Methods of Information in Medicine} \bold{1}, 99-103. } \note{ \code{GLaucomMVF} overlaps in some parts with \code{\link[TH.data]{GlaucomaM}}. } \examples{ \dontrun{ data("GlaucomaMVF", package = "ipred") library("rpart") response <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } errorest(Class~clv+lora+cs~., data = GlaucomaMVF, model=inclass, estimator="cv", pFUN = list(list(model = rpart)), cFUN = response) } } \keyword{datasets} ipred/man/dystrophy.Rd0000644000176200001440000000403313461342153014466 0ustar liggesusers\name{dystrophy} \alias{dystrophy} \non_function{} \title{Detection of muscular dystrophy carriers.} \usage{data(dystrophy)} \description{ The \code{dystrophy} data frame has 209 rows and 10 columns. } \format{ This data frame contains the following columns: \describe{ \item{OBS}{numeric. Observation number.} \item{HospID}{numeric. Hospital ID number.} \item{AGE}{numeric, age in years.} \item{M}{numeric. Month of examination.} \item{Y}{numeric. Year of examination.} \item{CK}{numeric. Serum marker creatine kinase.} \item{H}{numeric. Serum marker hemopexin.} \item{PK}{numeric. Serum marker pyruvate kinase.} \item{LD}{numeric. Serum marker lactate dehydroginase.} \item{Class}{factor with levels, \code{carrier} and \code{normal}.} } } \details{ Duchenne Muscular Dystrophy (DMD) is a genetically transmitted disease, passed from a mother to her children. Affected female offspring usually suffer no apparent symptoms, male offspring with the disease die at young age. Although female carriers have no physical symptoms they tend to exhibit elevated levels of certain serum enzymes or proteins. \cr The dystrophy dataset contains 209 observations of 75 female DMD carriers and 134 female DMD non-carrier. It includes 6 variables describing age of the female and the serum parameters serum marker creatine kinase (CK), serum marker hemopexin (H), serum marker pyruvate kinase (PK) and serum marker lactate dehydroginase (LD). The serum markers CK and H may be measured rather inexpensive from frozen serum, PK and LD requires fresh serum. } \source{ D.Andrews and A. Herzberg (1985), Data. Berlin: Springer-Verlag. } \references{ Robert Tibshirani and Geoffry Hinton (1998), Coaching variables for regression and classification. Statistics and Computing 8, 25-33. } \examples{ \dontrun{ data("dystrophy") library("rpart") errorest(Class~CK+H~AGE+PK+LD, data = dystrophy, model = inbagg, pFUN = list(list(model = lm, predict = mypredict.lm), list(model = rpart)), ns = 0.75, estimator = "cv") } } \keyword{datasets} ipred/man/print.inclass.Rd0000644000176200001440000000077613461342153015222 0ustar liggesusers\name{print.inclass} \alias{print.inclass} \title{Print Method for Inclass Object} \description{ Print object of class \code{inclass} in nice layout. } \usage{ \method{print}{inclass}(x, ...) } \arguments{ \item{x}{object of class \code{inclass}.} \item{\dots}{additional arguments.} } \details{ An object of class \code{inclass} is printed. Information about number and names of the intermediate variables, the used modelling technique and the number of drawn bootstrap samples is given. } \keyword{misc} ipred/man/control.errorest.Rd0000644000176200001440000000275213461342153015753 0ustar liggesusers\name{control.errorest} \alias{control.errorest} \title{ Control Error Rate Estimators } \description{ Some parameters that control the behaviour of \code{\link{errorest}}. } \usage{ control.errorest(k = 10, nboot = 25, strat = FALSE, random = TRUE, predictions = FALSE, getmodels=FALSE, list.tindx = NULL) } \arguments{ \item{k}{integer, specify $k$ for $k$-fold cross-validation.} \item{nboot}{integer, number of bootstrap replications.} \item{strat}{logical, if \code{TRUE}, cross-validation is performed using stratified sampling (for classification problems).} \item{random}{logical, if \code{TRUE}, cross-validation is performed using a random ordering of the data.} \item{predictions}{logical, indicates whether the prediction for each observation should be returned or not (classification and regression only). For a bootstrap based estimator a matrix of size 'number of observations' times nboot is returned with predicted values of the ith out-of-bootstrap sample in column i and 'NA's for those observations not included in the ith out-of-bootstrap sample.} \item{getmodels}{logical, indicates a list of all models should be returned. For cross-validation only.} \item{list.tindx}{list of numeric vectors, indicating which observations are included in each bootstrap or cross-validation sample, respectively.} } \value{ A list with the same components as arguments. } \keyword{misc} ipred/man/cv.Rd0000644000176200001440000000363113461342153013034 0ustar liggesusers\name{cv} \alias{cv} \alias{cv.default} \alias{cv.factor} \alias{cv.numeric} \alias{cv.integer} \alias{cv.Surv} \title{Cross-validated Error Rate Estimators.} \description{ Those functions are low-level functions used by \code{\link{errorest}} and are normally not called by users. } \usage{ \method{cv}{factor}(y, formula, data, model, predict, k=10, random=TRUE, strat=FALSE, predictions=NULL, getmodels=NULL, list.tindx = NULL, \dots) } \arguments{ \item{y}{response variable, either of class \code{factor} (classification), \code{numeric} (regression) or \code{Surv} (survival).} \item{formula}{a formula object.} \item{data}{data frame of predictors and response described in \code{formula}.} \item{model}{a function implementing the predictive model to be evaluated. The function \code{model} can either return an object representing a fitted model or a function with argument \code{newdata} which returns predicted values. In this case, the \code{predict} argument to \code{errorest} is ignored.} \item{predict}{a function with arguments \code{object} and \code{newdata} only which predicts the status of the observations in \code{newdata} based on the fitted model in \code{object}.} \item{k}{k-fold cross-validation.} \item{random}{logical, indicates whether a random order or the given order of the data should be used for sample splitting or not, defaults to \code{TRUE}.} \item{strat}{logical, stratified sampling or not, defaults to \code{FALSE}.} \item{predictions}{logical, return the prediction of each observation.} \item{getmodels}{logical, return a list of models for each fold.} \item{list.tindx}{list of numeric vectors, indicating which observations are included in each cross-validation sample.} \item{\dots}{additional arguments to \code{model}.} } \details{ See \code{\link{errorest}}. } \keyword{misc} ipred/man/errorest.Rd0000644000176200001440000002241213461342153014267 0ustar liggesusers\name{errorest} \alias{errorest} \alias{errorest.data.frame} \alias{errorest.default} \title{ Estimators of Prediction Error } \description{ Resampling based estimates of prediction error: misclassification error, root mean squared error or Brier score for survival data. } \usage{ \method{errorest}{data.frame}(formula, data, subset, na.action=na.omit, model=NULL, predict=NULL, estimator=c("cv", "boot", "632plus"), est.para=control.errorest(), ...) } \arguments{ \item{formula}{a formula of the form \code{lhs ~ rhs}. Either describing the model of explanatory and response variables in the usual way (see \code{\link{lm}}) or the model between explanatory and intermediate variables in the framework of indirect classification, see \code{\link{inclass}}.} \item{data}{a data frame containing the variables in the model formula and additionally the class membership variable if \code{model = inclass}. \code{data} is required for indirect classification, otherwise \code{formula} is evaluated in the calling environment.} \item{subset}{optional vector, specifying a subset of observations to be used.} \item{na.action}{function which indicates what should happen when the data contains \code{NA}'s, defaults to \code{\link{na.omit}}.} \item{model}{function. Modelling technique whose error rate is to be estimated. The function \code{model} can either return an object representing a fitted model or a function with argument \code{newdata} which returns predicted values. In this case, the \code{predict} argument to \code{errorest} is ignored.} \item{predict}{function. Prediction method to be used. The vector of predicted values must have the same length as the the number of to-be-predicted observations. Predictions corresponding to missing data must be replaced by \code{NA}. Additionally, \code{predict} has to return predicted values comparable to the responses (that is: factors for classification problems). See the example on how to make this sure for any predictor.} \item{estimator}{estimator of the misclassification error: \code{cv} cross-validation, \code{boot} bootstrap or \code{632plus} bias corrected bootstrap (classification only). } \item{est.para}{a list of additional parameters that control the calculation of the estimator, see \code{\link{control.errorest}} for details.} \item{\dots}{additional parameters to \code{model}.} } \details{ The prediction error for classification and regression models as well as predictive models for censored data using cross-validation or the bootstrap can be computed by \code{errorest}. For classification problems, the estimated misclassification error is returned. The root mean squared error is computed for regression problems and the Brier score for censored data (Graf et al., 1999) is reported if the response is censored. Any model can be specified as long as it is a function with arguments \code{model(formula, data, subset, na.action, ...)}. If a method \code{predict.model(object, newdata, ...)} is available, \code{predict} does not need to be specified. However, \code{predict} has to return predicted values in the same order and of the same length corresponding to the response. See the examples below. $k$-fold cross-validation and the usual bootstrap estimator with \code{est.para$nboot} bootstrap replications can be computed for all kind of problems. The bias corrected .632+ bootstrap by Efron and Tibshirani (1997) is available for classification problems only. Use \code{\link{control.errorest}} to specify additional arguments. \code{errorest} is a formula based interface to the generic functions \code{\link{cv}} or \code{\link{bootest}} which implement methods for classification, regression and survival problems. } \value{ The class of the object returned depends on the class of the response variable and the estimator used. In each case, it is a list with an element \code{error} and additional information. \code{print} methods are available for the inspection of the results. } \references{ Brian D. Ripley (1996), \emph{Pattern Recognition and Neural Networks}. Cambridge: Cambridge University Press. Bradley Efron and Robert Tibshirani (1997), Improvements on Cross-Validation: The .632+ Bootstrap Estimator. \emph{Journal of the American Statistical Association} \bold{92}(438), 548--560. Erika Graf, Claudia Schmoor, Willi Sauerbrei and Martin Schumacher (1999), Assessment and comparison of prognostic classification schemes for survival data. \emph{Statistics in Medicine} \bold{18}(17-18), 2529--2545. Rosa A. Schiavo and David J. Hand (2000), Ten More Years of Error Rate Research. \emph{International Statistical Review} \bold{68}(3), 296-310. David J. Hand, Hua Gui Li, Niall M. Adams (2001), Supervised Classification with Structured Class Definitions. \emph{Computational Statistics & Data Analysis} \bold{36}, 209--225. } \examples{ # Classification data("iris") library("MASS") # force predict to return class labels only mypredict.lda <- function(object, newdata) predict(object, newdata = newdata)$class # 10-fold cv of LDA for Iris data errorest(Species ~ ., data=iris, model=lda, estimator = "cv", predict= mypredict.lda) data("PimaIndiansDiabetes", package = "mlbench") \dontrun{ # 632+ bootstrap of LDA for Diabetes data errorest(diabetes ~ ., data=PimaIndiansDiabetes, model=lda, estimator = "632plus", predict= mypredict.lda) } #cv of a fixed partition of the data list.tindx <- list(1:100, 101:200, 201:300, 301:400, 401:500, 501:600, 601:700, 701:768) errorest(diabetes ~ ., data=PimaIndiansDiabetes, model=lda, estimator = "cv", predict = mypredict.lda, est.para = control.errorest(list.tindx = list.tindx)) \dontrun{ #both bootstrap estimations based on fixed partitions list.tindx <- vector(mode = "list", length = 25) for(i in 1:25) { list.tindx[[i]] <- sample(1:768, 768, TRUE) } errorest(diabetes ~ ., data=PimaIndiansDiabetes, model=lda, estimator = c("boot", "632plus"), predict= mypredict.lda, est.para = control.errorest(list.tindx = list.tindx)) } data("Glass", package = "mlbench") # LDA has cross-validated misclassification error of # 38\% (Ripley, 1996, page 98) # Pruned trees about 32\% (Ripley, 1996, page 230) # use stratified sampling here, i.e. preserve the class proportions errorest(Type ~ ., data=Glass, model=lda, predict=mypredict.lda, est.para=control.errorest(strat=TRUE)) # force predict to return class labels mypredict.rpart <- function(object, newdata) predict(object, newdata = newdata,type="class") library("rpart") pruneit <- function(formula, ...) prune(rpart(formula, ...), cp =0.01) errorest(Type ~ ., data=Glass, model=pruneit, predict=mypredict.rpart, est.para=control.errorest(strat=TRUE)) # compute sensitivity and specifity for stabilised LDA data("GlaucomaM", package = "TH.data") error <- errorest(Class ~ ., data=GlaucomaM, model=slda, predict=mypredict.lda, est.para=control.errorest(predictions=TRUE)) # sensitivity mean(error$predictions[GlaucomaM$Class == "glaucoma"] == "glaucoma") # specifity mean(error$predictions[GlaucomaM$Class == "normal"] == "normal") # Indirect Classification: Smoking data data(Smoking) # Set three groups of variables: # 1) explanatory variables are: TarY, NicY, COY, Sex, Age # 2) intermediate variables are: TVPS, BPNL, COHB # 3) response (resp) is defined by: resp <- function(data){ data <- data[, c("TVPS", "BPNL", "COHB")] res <- t(t(data) > c(4438, 232.5, 58)) res <- as.factor(ifelse(apply(res, 1, sum) > 2, 1, 0)) res } response <- resp(Smoking[ ,c("TVPS", "BPNL", "COHB")]) smoking <- cbind(Smoking, response) formula <- response~TVPS+BPNL+COHB~TarY+NicY+COY+Sex+Age # Estimation per leave-one-out estimate for the misclassification is # 36.36\% (Hand et al., 2001), using indirect classification with # linear models \dontrun{ errorest(formula, data = smoking, model = inclass,estimator = "cv", pFUN = list(list(model=lm, predict = mypredict.lm)), cFUN = resp, est.para=control.errorest(k=nrow(smoking))) } # Regression data("BostonHousing", package = "mlbench") # 10-fold cv of lm for Boston Housing data errorest(medv ~ ., data=BostonHousing, model=lm, est.para=control.errorest(random=FALSE)) # the same, with "model" returning a function for prediction # instead of an object of class "lm" mylm <- function(formula, data) { mod <- lm(formula, data) function(newdata) predict(mod, newdata) } errorest(medv ~ ., data=BostonHousing, model=mylm, est.para=control.errorest(random=FALSE)) # Survival data data("GBSG2", package = "TH.data") library("survival") # prediction is fitted Kaplan-Meier predict.survfit <- function(object, newdata) object # 5-fold cv of Kaplan-Meier for GBSG2 study errorest(Surv(time, cens) ~ 1, data=GBSG2, model=survfit, predict=predict.survfit, est.para=control.errorest(k=5)) } \keyword{misc} ipred/man/bootest.Rd0000644000176200001440000000371213461342153014103 0ustar liggesusers\name{bootest} \alias{bootest} \alias{bootest.default} \alias{bootest.factor} \alias{bootest.numeric} \alias{bootest.integer} \alias{bootest.Surv} \title{Bootstrap Error Rate Estimators} \description{ Those functions are low-level functions used by \code{\link{errorest}} and are normally not called by users. } \usage{ \method{bootest}{factor}(y, formula, data, model, predict, nboot=25, bc632plus=FALSE, list.tindx = NULL, predictions = FALSE, both.boot = FALSE, \dots)} \arguments{ \item{y}{the response variable, either of class \code{factor} (classification), \code{numeric} (regression) or \code{Surv} (survival).} \item{formula}{a formula object.} \item{data}{data frame of predictors and response described in \code{formula}.} \item{model}{a function implementing the predictive model to be evaluated. The function \code{model} can either return an object representing a fitted model or a function with argument \code{newdata} which returns predicted values. In this case, the \code{predict} argument to \code{errorest} is ignored.} \item{predict}{a function with arguments \code{object} and \code{newdata} only which predicts the status of the observations in \code{newdata} based on the fitted model in \code{object}.} \item{nboot}{number of bootstrap replications to be used.} \item{bc632plus}{logical. Should the bias corrected version of misclassification error be computed?} \item{predictions}{logical, return a matrix of predictions. The ith column contains predictions of the ith out-of-bootstrap sample and 'NA's corresponding to the ith bootstrap sample.} \item{list.tindx}{list of numeric vectors, indicating which observations are included in each bootstrap sample.} \item{both.boot}{logical, return both (bootstrap and 632plus) estimations or only one of them.} \item{\dots}{additional arguments to \code{model}.} } \details{ See \code{\link{errorest}}. } \keyword{misc} ipred/DESCRIPTION0000644000176200001440000000211514120340711013053 0ustar liggesusersPackage: ipred Title: Improved Predictors Version: 0.9-12 Date: 2021-09-15 Authors@R: c(person("Andrea", "Peters", role = "aut"), person("Torsten", "Hothorn", role = c("aut", "cre"), email = "Torsten.Hothorn@R-project.org"), person("Brian D.", "Ripley", role = "ctb"), person("Terry", "Therneau", role = "ctb"), person("Beth", "Atkinson", role = "ctb")) Description: Improved predictive models by indirect classification and bagging for classification, regression and survival problems as well as resampling based estimators of prediction error. Depends: R (>= 2.10) Imports: rpart (>= 3.1-8), MASS, survival, nnet, class, prodlim Suggests: mvtnorm, mlbench, TH.data, randomForest, party License: GPL (>= 2) NeedsCompilation: yes Packaged: 2021-09-15 09:22:36 UTC; hothorn Author: Andrea Peters [aut], Torsten Hothorn [aut, cre], Brian D. Ripley [ctb], Terry Therneau [ctb], Beth Atkinson [ctb] Maintainer: Torsten Hothorn Repository: CRAN Date/Publication: 2021-09-15 09:50:01 UTC ipred/build/0000755000176200001440000000000014120335534012454 5ustar liggesusersipred/build/vignette.rds0000644000176200001440000000035414120335534015015 0ustar liggesusersQ 0 N W/^D=x-.BvczLx4M/Bc pNO6%.& <0]^3ro3u(VF$RASҩR<3HRr|wq' 3`If; 7xk9L`;#ܮP츆#ga|ڕܝ_d狎U8uot?^ipred/tests/0000755000176200001440000000000014120335534012517 5ustar liggesusersipred/tests/ipred-segfault.R0000644000176200001440000001046013461342153015560 0ustar liggesuserslibrary("ipred") library("mlbench") library("MASS") library("survival") suppressWarnings(RNGversion("3.5.3")) actversion <- paste(R.version$major, R.version$minor, sep=".") thisversion <- "1.7.0" #if (compareVersion(actversion, thisversion) >= 0) { # RNGversion("1.6.2") #} set.seed(29081975) # Classification learn <- as.data.frame(mlbench.twonorm(200)) test <- as.data.frame(mlbench.twonorm(100)) # bagging mod <- bagging(classes ~ ., data=learn, coob=TRUE, nbagg=10) mod predict(mod)[1:10] # Double-Bagging comb.lda <- list(list(model=lda, predict=function(obj, newdata) predict(obj, newdata)$x)) mod <- bagging(classes ~ ., data=learn, comb=comb.lda, nbagg=10) mod predict(mod, newdata=test[1:10,]) predict(mod, newdata=test[1:10,], agg="aver") predict(mod, newdata=test[1:10,], agg="wei") predict(mod, newdata=test[1:10,], type="prob") predict(mod, newdata=test[1:10,], type="prob", agg="aver") predict(mod, newdata=test[1:10,], type="prob", agg="wei") mypredict.lda <- function(object, newdata) predict(object, newdata = newdata)$class errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda) errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, est.para=control.errorest(k=5, random=FALSE)) lapply(errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, est.para=control.errorest(k=5, random=FALSE, getmodels=TRUE))$models, class) errorest(classes ~ ., data=learn, model=bagging, est.para=control.errorest(k=2), nbagg=10) errorest(classes ~ ., data=learn, model=bagging, est.para=control.errorest(k=2), nbagg=10, comb=comb.lda) errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, estimator="boot") errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, estimator="632plus") # Regression learn <- as.data.frame(mlbench.friedman1(100)) test <- as.data.frame(mlbench.friedman1(100)) # bagging mod <- bagging(y ~ ., data=learn, coob=TRUE, nbagg=10) mod predict(mod)[1:10] predict(mod, newdata=test[1:10,]) predict(mod, newdata=test[1:10,], agg="aver") predict(mod, newdata=test[1:10,], agg="wei") errorest(y ~ ., data=learn, model=lm) errorest(y ~ ., data=learn, model=lm, est.para=control.errorest(k=5, random=FALSE)) lapply(errorest(y ~ ., data=learn, model=lm, est.para=control.errorest(k=5, random=FALSE, getmodels=TRUE))$models, class) errorest(y ~ ., data=learn, model=lm, estimator="boot") # survival learn <- rsurv(100, model="C") test <- rsurv(100, model="C") mod <- bagging(Surv(time, cens) ~ ., data=learn, nbagg=10) mod predict(mod, newdata=test[1:10,]) #errorest(Surv(time, cens) ~ ., data=learn, model=bagging, # est.para=list(k=2, random=FALSE), nbagg=5) #errorest(Surv(time, cens) ~ ., data=learn, model=bagging, # estimator="boot", nbagg=5, est.para=list(nboot=5)) #insert control.errorest errorest(Surv(time, cens) ~ ., data=learn, model=bagging, est.para=control.errorest(k=2, random=FALSE), nbagg=5) errorest(Surv(time, cens) ~ ., data=learn, model=bagging, estimator="boot", nbagg=5, est.para=control.errorest(nboot=5)) #lapply(errorest(Surv(time, cens) ~ ., data=learn, model=bagging, # estimator="cv", nbagg=1, est.para=list(k=2, random=FALSE, # getmodels=TRUE))$models, class) #insert control.errorest lapply(errorest(Surv(time, cens) ~ ., data=learn, model=bagging, estimator="cv", nbagg=1, est.para=control.errorest(k=2, random=FALSE, getmodels=TRUE))$models, class) # bundling for regression learn <- as.data.frame(mlbench.friedman1(100)) test <- as.data.frame(mlbench.friedman1(100)) comb <- list(list(model=lm, predict=predict.lm)) modc <- bagging(y ~ ., data=learn, nbagg=10, comb=comb) modc predict(modc, newdata=learn)[1:10] # bundling for survival while(FALSE) { data("GBSG2", package = "ipred") rcomb <- list(list(model=coxph, predict=predict)) mods <- bagging(Surv(time,cens) ~ ., data=GBSG2, nbagg=10, comb=rcomb, control=rpart.control(xval=0)) predict(mods, newdata=GBSG2[1:3,]) # test for method dispatch on integer valued responses y <- sample(1:100, 100) class(y) x <- matrix(rnorm(100*5), ncol=5) mydata <- as.data.frame(cbind(y, x)) cv(y, y ~ ., data=mydata, model=lm, predict=predict) bootest(y, y ~ ., data=mydata, model=lm, predict=predict) bagging(y ~., data=mydata, nbagg=10) } ipred/tests/ipred-bugs.Rout.save0000644000176200001440000002454313461342153016402 0ustar liggesusers R Under development (unstable) (2019-04-24 r76421) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(ipred) > > suppressWarnings(RNGversion("3.5.3")) > actversion <- paste(R.version$major, R.version$minor, sep=".") > thisversion <- "1.7.0" > > #if (compareVersion(actversion, thisversion) >= 0) { > # RNGversion("1.6.2") > #} > set.seed(29081975) > > data("BreastCancer", package = "mlbench") > mod <- bagging(Class ~ Cl.thickness + Cell.size + + Cell.shape + Marg.adhesion + + Epith.c.size + Bare.nuclei + + Bl.cromatin + Normal.nucleoli + + Mitoses, data=BreastCancer, coob=TRUE) > print(mod) Bagging classification trees with 25 bootstrap replications Call: bagging.data.frame(formula = Class ~ Cl.thickness + Cell.size + Cell.shape + Marg.adhesion + Epith.c.size + Bare.nuclei + Bl.cromatin + Normal.nucleoli + Mitoses, data = BreastCancer, coob = TRUE) Out-of-bag estimate of misclassification error: 0.0439 > > print(a <- predict(mod, newdata=BreastCancer)) [1] benign benign benign malignant benign malignant benign [8] benign benign benign benign benign malignant benign [15] malignant malignant benign benign malignant benign malignant [22] malignant benign malignant benign malignant benign benign [29] benign benign benign benign malignant benign benign [36] benign malignant benign malignant malignant malignant malignant [43] malignant malignant malignant benign malignant benign benign [50] malignant malignant malignant malignant malignant malignant malignant [57] malignant malignant malignant malignant malignant benign malignant [64] malignant benign malignant benign malignant malignant benign [71] benign malignant benign malignant malignant benign benign [78] benign benign benign benign benign benign benign [85] malignant malignant malignant malignant benign benign benign [92] benign benign benign benign benign benign benign [99] malignant malignant malignant malignant benign malignant malignant [106] malignant malignant malignant benign malignant benign malignant [113] malignant malignant benign benign benign malignant benign [120] benign benign benign malignant malignant malignant benign [127] malignant benign malignant benign benign benign malignant [134] benign benign benign benign benign benign benign [141] benign benign malignant benign benign benign malignant [148] benign benign malignant benign malignant malignant benign [155] benign malignant benign benign benign malignant malignant [162] benign benign benign benign benign malignant malignant [169] benign benign benign benign benign malignant malignant [176] malignant benign malignant benign malignant benign benign [183] benign malignant malignant benign malignant malignant malignant [190] benign malignant malignant benign benign benign benign [197] benign benign benign benign malignant malignant benign [204] benign benign malignant malignant benign benign benign [211] malignant malignant benign malignant malignant malignant benign [218] benign malignant benign benign malignant malignant malignant [225] malignant benign malignant malignant benign malignant malignant [232] malignant benign malignant benign benign malignant malignant [239] malignant malignant benign benign benign benign benign [246] benign malignant malignant benign benign benign malignant [253] benign malignant malignant malignant benign benign benign [260] benign malignant malignant malignant malignant malignant benign [267] malignant malignant malignant benign malignant benign malignant [274] malignant benign benign benign benign benign malignant [281] benign benign malignant malignant malignant malignant malignant [288] benign malignant malignant benign benign malignant malignant [295] benign malignant benign benign benign malignant malignant [302] benign malignant benign malignant malignant benign benign [309] malignant benign benign benign malignant benign benign [316] malignant malignant malignant benign benign malignant benign [323] benign malignant benign benign malignant benign malignant [330] malignant malignant benign benign malignant malignant benign [337] malignant benign benign malignant malignant benign benign [344] benign malignant benign benign benign malignant malignant [351] benign benign benign malignant benign benign malignant [358] malignant malignant malignant malignant malignant benign benign [365] benign benign malignant malignant benign benign benign [372] benign benign benign benign benign benign benign [379] benign benign benign malignant benign benign benign [386] benign malignant benign benign benign benign malignant [393] benign benign benign benign benign benign benign [400] benign malignant benign benign benign benign benign [407] benign benign benign benign benign benign malignant [414] benign malignant benign malignant benign benign benign [421] benign malignant benign benign benign malignant benign [428] malignant benign benign benign benign benign benign [435] benign malignant malignant benign benign benign malignant [442] benign benign benign benign benign benign benign [449] benign malignant benign benign benign malignant benign [456] malignant malignant malignant benign benign benign benign [463] benign benign benign malignant malignant malignant benign [470] benign benign benign benign benign benign benign [477] benign benign benign malignant benign benign malignant [484] malignant benign benign benign malignant malignant malignant [491] benign malignant benign malignant benign benign benign [498] benign benign benign benign benign benign benign [505] benign benign malignant benign benign benign benign [512] benign benign benign malignant malignant benign benign [519] benign malignant benign benign malignant malignant benign [526] benign benign benign benign benign malignant benign [533] benign benign benign benign benign benign benign [540] benign benign benign benign benign benign benign [547] malignant benign benign malignant benign benign benign [554] benign benign benign benign benign benign benign [561] benign benign benign benign benign malignant benign [568] benign malignant malignant malignant malignant benign benign [575] malignant benign benign benign benign benign benign [582] malignant malignant benign benign benign malignant benign [589] malignant benign malignant malignant malignant benign malignant [596] benign benign benign benign benign benign benign [603] benign malignant malignant malignant benign benign malignant [610] benign malignant malignant malignant benign benign benign [617] benign benign benign benign benign benign benign [624] benign benign benign malignant benign benign benign [631] benign benign benign malignant benign benign malignant [638] benign benign benign benign benign benign benign [645] benign benign benign benign malignant benign benign [652] benign benign benign benign benign benign benign [659] malignant benign benign benign benign benign benign [666] benign benign benign malignant malignant malignant benign [673] benign benign benign benign benign benign benign [680] benign malignant malignant benign benign benign benign [687] benign benign benign benign benign malignant benign [694] benign benign benign malignant malignant malignant Levels: benign malignant > stopifnot(length(a) == nrow(BreastCancer)) > > # bagging failed if only one predictor was specified > # by Christoph M. Friedrich , April 29th, 2002 > > X <- as.data.frame(matrix(rnorm(1000), ncol=10)) > y <- factor(ifelse(apply(X, 1, mean) > 0, 1, 0)) > learn <- cbind(y, X) > mt <- bagging(y ~ V1, data=learn, coob=TRUE) > # > # This won't work because of some difficulties with predict.lda > # mt <- bagging(y ~ V1, data=learn, method="double", coob=FALSE) > # > X <- as.data.frame(matrix(rnorm(1000), ncol=10)) > y <- apply(X, 1, mean) + rnorm(nrow(X)) > learn <- cbind(y, X) > mt <- bagging(y ~ V1, data=learn, coob=TRUE) > > # cv.numeric and bootest.numeric were broken, check for reasonaly values > X <- as.data.frame(matrix(rnorm(1000), ncol=10)) > y <- apply(X, 1, mean) + rnorm(nrow(X)) > learn <- cbind(y, X) > newy <- apply(X, 1, mean) + rnorm(nrow(X)) > mod <- lm(y ~ ., data=learn) > trueerr <- sqrt(mean((newy - fitted(mod))^2)) > cverr <- rep(0,5) > for (i in 1:5) cverr[i] <- errorest(y ~., data=learn, model=lm)$error > booterr <- errorest(y ~., data=learn, model=lm, + estimator="boot",est.para=control.errorest(nboot=50))$error > print(trueerr/mean(cverr)) [1] 0.9612632 > print(trueerr/booterr) [1] 0.9073771 > > proc.time() user system elapsed 2.980 0.156 3.131 ipred/tests/ipred-smalltest.Rout.save0000644000176200001440000000306713461342153017450 0ustar liggesusers R Under development (unstable) (2019-04-24 r76421) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > library(ipred) > suppressWarnings(RNGversion("3.5.3")) > > # check if SdiffKM computes > # > # int_start^stop (exp(-h*t) - c)^2 dt > # > # in the correct way > > # low-level interface needed > myfoo <- function(times, prob, h, window=0.0001) { + .Call("SdiffKM", as.double(c(0, times)), + as.double(c(prob[1], prob)), as.double(c(h, + window)), PACKAGE = "ipred") + } > > # to compare with > mexp <- function(start, stop, haz, c=0) { + foo <- function(t) + exp(-2*haz*t)/(-2*haz) - 2*c*exp(-haz*t)/(-haz) + c^2*t + foo(stop) - foo(start) + } > > > times <- seq(from=0.01, to=8, by=0.01) > > for (myc in c(0,0.5,0.9)) { + for (h in c(1,2,3)) { + prob <- rep(myc, length(times)) + a <- round(mexp(0, max(times), h, c=myc),7) + b <- round(myfoo(times, prob, h), 7) + stopifnot(all.equal(a,b)) + } + } > > proc.time() user system elapsed 1.696 0.168 1.862 ipred/tests/ipred-smalltest.R0000644000176200001440000000143213461342153015755 0ustar liggesusers library(ipred) suppressWarnings(RNGversion("3.5.3")) # check if SdiffKM computes # # int_start^stop (exp(-h*t) - c)^2 dt # # in the correct way # low-level interface needed myfoo <- function(times, prob, h, window=0.0001) { .Call("SdiffKM", as.double(c(0, times)), as.double(c(prob[1], prob)), as.double(c(h, window)), PACKAGE = "ipred") } # to compare with mexp <- function(start, stop, haz, c=0) { foo <- function(t) exp(-2*haz*t)/(-2*haz) - 2*c*exp(-haz*t)/(-haz) + c^2*t foo(stop) - foo(start) } times <- seq(from=0.01, to=8, by=0.01) for (myc in c(0,0.5,0.9)) { for (h in c(1,2,3)) { prob <- rep(myc, length(times)) a <- round(mexp(0, max(times), h, c=myc),7) b <- round(myfoo(times, prob, h), 7) stopifnot(all.equal(a,b)) } } ipred/tests/ipred-bugs.R0000644000176200001440000000336713461342153014716 0ustar liggesuserslibrary(ipred) suppressWarnings(RNGversion("3.5.3")) actversion <- paste(R.version$major, R.version$minor, sep=".") thisversion <- "1.7.0" #if (compareVersion(actversion, thisversion) >= 0) { # RNGversion("1.6.2") #} set.seed(29081975) data("BreastCancer", package = "mlbench") mod <- bagging(Class ~ Cl.thickness + Cell.size + Cell.shape + Marg.adhesion + Epith.c.size + Bare.nuclei + Bl.cromatin + Normal.nucleoli + Mitoses, data=BreastCancer, coob=TRUE) print(mod) print(a <- predict(mod, newdata=BreastCancer)) stopifnot(length(a) == nrow(BreastCancer)) # bagging failed if only one predictor was specified # by Christoph M. Friedrich , April 29th, 2002 X <- as.data.frame(matrix(rnorm(1000), ncol=10)) y <- factor(ifelse(apply(X, 1, mean) > 0, 1, 0)) learn <- cbind(y, X) mt <- bagging(y ~ V1, data=learn, coob=TRUE) # # This won't work because of some difficulties with predict.lda # mt <- bagging(y ~ V1, data=learn, method="double", coob=FALSE) # X <- as.data.frame(matrix(rnorm(1000), ncol=10)) y <- apply(X, 1, mean) + rnorm(nrow(X)) learn <- cbind(y, X) mt <- bagging(y ~ V1, data=learn, coob=TRUE) # cv.numeric and bootest.numeric were broken, check for reasonaly values X <- as.data.frame(matrix(rnorm(1000), ncol=10)) y <- apply(X, 1, mean) + rnorm(nrow(X)) learn <- cbind(y, X) newy <- apply(X, 1, mean) + rnorm(nrow(X)) mod <- lm(y ~ ., data=learn) trueerr <- sqrt(mean((newy - fitted(mod))^2)) cverr <- rep(0,5) for (i in 1:5) cverr[i] <- errorest(y ~., data=learn, model=lm)$error booterr <- errorest(y ~., data=learn, model=lm, estimator="boot",est.para=control.errorest(nboot=50))$error print(trueerr/mean(cverr)) print(trueerr/booterr) ipred/src/0000755000176200001440000000000014120335534012144 5ustar liggesusersipred/src/init.c0000644000176200001440000000074113461342153013257 0ustar liggesusers #include #include #include // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .Call calls */ extern SEXP SdiffKM(SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"SdiffKM", (DL_FUNC) &SdiffKM, 3}, {NULL, NULL, 0} }; void R_init_ipred(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } ipred/src/SdiffKM.c0000644000176200001440000000434213461342153013600 0ustar liggesusers/* $Id: SdiffKM.c,v 1.2 2003/03/27 13:36:02 hothorn Exp $ SdiffKM: integrated squared difference between survival curve and KM estimator */ #include #include #include SEXP SdiffKM(SEXP time, SEXP prob, SEXP args) { SEXP rint; double d, p, helpone, helptwo, k; double myint = 0.0; double hazard, window, tw; int i, j, n; /* check arguments */ if (!isVector(time)) error("Argument time is not a vector"); n = LENGTH(time); if (REAL(time)[0] != 0.0) error("time[1] must be zero"); if (!isVector(prob)) error("Argument prob is not a vector"); if (REAL(prob)[0] > 1.0) error("prob[1] must be less or equal 1"); if (REAL(prob)[0] < 0.0) error("prob[1] must be greater or equal 0"); if (LENGTH(prob) != n) error("prob and time differ in their length"); if (!isVector(args) || LENGTH(args) != 2) error("Argument args must be vector with two elements"); hazard = REAL(args)[0]; window = REAL(args)[1]; /* prepare for return values */ PROTECT(rint = allocVector(REALSXP, 1)); UNPROTECT(1); REAL(rint)[0] = 0.0; /* for all discrete times */ for (i = 0; i < n-1; i++) { /* get difference between times */ d = REAL(time)[i+1] - REAL(time)[i]; /* estimated survival probability at this time */ p = REAL(prob)[i]; /* if the difference is small enough */ if (d < window) { helpone = p - exp(-REAL(time)[i] * hazard); helptwo = p - exp(-REAL(time)[i+1] * hazard); /* mean of over and under sum */ myint += 0.5 * d * (helpone*helpone + helptwo*helptwo); } else { /* split up in smaller pieces */ k = ftrunc(d/window) + 1; tw = d/k; for (j = 0; j < k; j++) { helpone = p - exp(-(REAL(time)[i] + j*tw)*hazard); helptwo = p - exp(-(REAL(time)[i] + (j+1)*tw)*hazard); /* mean of over and under sum for all small windows */ myint += 0.5*tw*(helpone*helpone + helptwo*helptwo); } } } /* ok, get outa here */ REAL(rint)[0] = myint; return(rint); } ipred/vignettes/0000755000176200001440000000000014120335534013365 5ustar liggesusersipred/vignettes/ipred.bib0000644000176200001440000000400313461342153015145 0ustar liggesusers@article{breiman:1996, key = {53}, author = {L. Breiman}, title = {Bagging Predictors}, journal = {Machine Learning}, pages = {123-140}, year = {1996}, volume = {24}, number = {2} } @article{efron:1997, key = {52}, author = {B. Efron and R. Tibshirani}, title = {Improvements on Cross-Validation: The .632+ Bootstrap Method}, journal = {Journal of the American Statistical Association}, pages = {548-560}, year = {1997}, volume = {92}, number = {438} } @article{hand:2001, key = {32}, author = {D.J. Hand and H.G. Li and N.M. Adams}, title = {Supervised classification with structured class definitions}, journal = {Computational Statistics \& Data Analysis}, pages = {209-225}, year = {2001}, volume = {36} } @inproceedings{ifcs:2001, author = {A. Peters and T. Hothorn and B. Lausen}, title = {Glaucoma diagnosis by indirect classifiers}, booktitle = {Studies in Classification, Data Analysis, and Knowledge Organization (to appear)}, organization = {Proceedings of the 8th Conference of the International Federation of Classification Societies}, year = {2002} } @techreport{out-of-bag:1996, key = {T162}, author = {Leo Breiman}, title = {Out-Of-Bag Estimation}, institution = {Statistics Department, University of California Berkeley}, year = {1996}, address = {Berkeley CA 94708} } @article{double-bag:2002, key = {247}, author = {Torsten Hothorn and Berthold Lausen}, title = {Double-Bagging: Combining classifiers by bootstrap aggregation}, journal = {Pattern Recognition}, year = {2003}, pages = {1303-1309}, volume = {36}, number = {6} } @article{Rnews:Peters+Hothorn+Lausen:2002, key = {308}, author = {Andrea Peters and Torsten Hothorn and Berthold Lausen}, title = {ipred: Improved Predictors}, journal = {R News}, year = 2002, month = {June}, volume = 2, number = 2, pages = {33--36}, url = {http://CRAN.R-project.org/doc/Rnews/} } ipred/vignettes/ipred-examples.Rnw0000644000176200001440000004270413461342153017005 0ustar liggesusers\documentclass[11pt]{article} \usepackage[round]{natbib} \usepackage{bibentry} \usepackage{amsfonts} \usepackage{hyperref} \renewcommand{\baselinestretch}{1.3} \newcommand{\ipred}{\texttt{ipred }} %\VignetteIndexEntry{Some more or less useful examples for illustration.} %\VignetteDepends{ipred} %\textwidth=6.2in %\VignetteDepends{mvtnorm,TH.data,rpart,MASS} \begin{document} \title{\ipred: Improved Predictors} \date{} \SweaveOpts{engine=R,eps=TRUE,pdf=TRUE} <>= options(prompt=">", width=50) set.seed(210477) @ \maketitle This short manual is heavily based on \cite{Rnews:Peters+Hothorn+Lausen:2002} and needs some improvements. \section{Introduction} In classification problems, there are several attempts to create rules which assign future observations to certain classes. Common methods are for example linear discriminant analysis or classification trees. Recent developments lead to substantial reduction of misclassification error in many applications. Bootstrap aggregation \citep[``bagging'',][]{breiman:1996} combines classifiers trained on bootstrap samples of the original data. Another approach is indirect classification, which incorporates a priori knowledge into a classification rule \citep{hand:2001}. Since the misclassification error is a criterion to assess the classification techniques, its estimation is of main importance. A nearly unbiased but highly variable estimator can be calculated by cross validation. \cite{efron:1997} discuss bootstrap estimates of misclassification error. As a by-product of bagging, \cite{out-of-bag:1996} proposes the out-of-bag estimator. \\ However, the calculation of the desired classification models and their misclassification errors is often aggravated by different and specialized interfaces of the various procedures. We propose the \ipred package as a first attempt to create a unified interface for improved predictors and various error rate estimators. In the following we demonstrate the functionality of the package in the example of glaucoma classification. We start with an overview about the disease and data and review the implemented classification and estimation methods in context with their application to glaucoma diagnosis. \section{Glaucoma} Glaucoma is a slowly processing and irreversible disease that affects the optic nerve head. It is the second most reason for blindness worldwide. Glaucoma is usually diagnosed based on a reduced visual field, assessed by a medical examination of perimetry and a smaller number of intact nerve fibers at the optic nerve head. One opportunity to examine the amount of intact nerve fibers is using the Heidelberg Retina Tomograph (HRT), a confocal laser scanning tomograph, which does a three dimensional topographical analysis of the optic nerve head morphology. It produces a series of $32$ images, each of $256 \times 256$ pixels, which are converted to a single topographic image. A less complex, but although a less informative examination tool is the $2$-dimensional fundus photography. However, in cooperation with clinicians and a priori analysis we derived a diagnosis of glaucoma based on three variables only: $w_{lora}$ represents the loss of nerve fibers and is obtained by a $2$-dimensional fundus photography, $w_{cs}$ and $w_{clv}$ describe the visual field defect \citep{ifcs:2001}. \begin{center} \begin{figure}[h] \begin{center} {\small \setlength{\unitlength}{0.6cm} \begin{picture}(14.5,5) \put(5, 4.5){\makebox(2, 0.5){$w_{clv}\geq 5.1$}} \put(2.5, 3){\makebox(2, 0.5){$w_{lora}\geq 49.23$}} \put(7.5, 3){\makebox(2, 0.5){$w_{lora} \geq 58.55$}} \put(0, 1.5){\makebox(2, 0.5){$glaucoma$}} \put(3.5, 1.5){\makebox(2, 0.5){$normal$}} \put(6.5, 1.5){\makebox(2, 0.5){$w_{cs} < 1.405$}} \put(10, 1.5){\makebox(2, 0.5){$normal$}} \put(3.5, 0){\makebox(2, 0.5){$glaucoma$}} \put(6.5, 0){\makebox(2, 0.5){$normal$}} \put(6, 4.5){\vector(-3, -2){1.5}} \put(6, 4.5){\vector(3, -2){1.5}} \put(3.5, 3){\vector(3, -2){1.5}} \put(3.5, 3){\vector(-3, -2){1.5}} \put(8.5, 3){\vector(3, -2){1.5}} \put(8.5, 3){\vector(-3, -2){1.5}} \put(6.5, 1.5){\vector(3, -2){1.5}} \put(6.5, 1.5){\vector(-3, -2){1.5}} \end{picture} } \end{center} \caption{Glaucoma diagnosis. \label{diag}} \end{figure} \end{center} Figure \ref{diag} represents the diagnosis of glaucoma in terms of a medical decision tree. A complication of the disease is that a damage in the optic nerve head morphology precedes a measurable visual field defect. Furthermore, an early detection is of main importance, since an adequate therapy can only slow down the progression of the disease. Hence, a classification rule for detecting early damages should include morphological informations, rather than visual field data only. Two example datasets are included in the package. The first one contains measurements of the eye morphology only (\texttt{GlaucomaM}), including $62$ variables for $196$ observations. The second dataset (\texttt{GlaucomaMVF}) contains additional visual field measurements for a different set of patients. In both example datasets, the observations in the two groups are matched by age and sex to prevent any bias. \section{Bagging} Referring to the example of glaucoma diagnosis we first demonstrate the functionality of the \texttt{bagging} function. We fit \texttt{nbagg = 25} (default) classification trees for bagging by <>= library("ipred") library("rpart") library("MASS") data("GlaucomaM", package="TH.data") gbag <- bagging(Class ~ ., data = GlaucomaM, coob=TRUE) @ where \texttt{GlaucomaM} contains explanatory HRT variables and the response of glaucoma diagnosis (\texttt{Class}), a factor at two levels \texttt{normal} and \texttt{glaucoma}. \texttt{print} returns informations about the returned object, i.e. the number of bootstrap replications used and, as requested by \texttt{coob=TRUE}, the out-of-bag estimate of misclassification error \citep{out-of-bag:1996}. <>= print(gbag) @ The out-of-bag estimate uses the observations which are left out in a bootstrap sample to estimate the misclassification error at almost no additional computational costs. \cite{double-bag:2002} propose to use the out-of-bag samples for a combination of linear discriminant analysis and classification trees, called ``Double-Bagging''. For example, a combination of a stabilised linear disciminant analysis with classification trees can be computed along the following lines <>= scomb <- list(list(model=slda, predict=function(object, newdata) predict(object, newdata)$x)) gbagc <- bagging(Class ~ ., data = GlaucomaM, comb=scomb) @ \texttt{predict} predicts future observations according to the fitted model. <>= predict(gbagc, newdata=GlaucomaM[c(1:3, 99:102), ]) @ Both \texttt{bagging} and \texttt{predict} rely on the \texttt{rpart} routines. The \texttt{rpart} routine for each bootstrap sample can be controlled in the usual way. By default \texttt{rpart.control} is used with \texttt{minsize=2} and \texttt{cp=0} and it is wise to turn cross-validation off (\texttt{xval=0}). The function \texttt{prune} can be used to prune each of the trees to an appropriate size. \section{Indirect Classification} Especially in a medical context it often occurs that a priori knowledge about a classifying structure is given. For example it might be known that a disease is assessed on a subgroup of the given variables or, moreover, that class memberships are assigned by a deterministically known classifying function. \cite{hand:2001} proposes the framework of indirect classification which incorporates this a priori knowledge into a classification rule. In this framework we subdivide a given data set into three groups of variables: those to be used predicting the class membership (explanatory), those to be used defining the class membership (intermediate) and the class membership variable itself (response). For future observations, an indirect classifier predicts values for the appointed intermediate variables based on explanatory variables only. The observation is classified based on their predicted intermediate variables and a fixed classifying function. This indirect way of classification using the predicted intermediate variables offers possibilities to incorporate a priori knowledge by the subdivision of variables and by the construction of a fixed classifying function. We apply indirect classification by using the function \texttt{inclass}. Referring to the glaucoma example, explanatory variables are HRT and anamnestic variables only, intermediate variables are $w_{lora}, \, w_{cs}$ and $w_{clv}$. The response is the diagnosis of glaucoma which is determined by a fixed classifying function and therefore not included in the learning sample \texttt{GlaucomaMVF}. We assign the given variables to explanatory and intermediate by specifying the input formula. <>= data("GlaucomaMVF", package="ipred") GlaucomaMVF <- GlaucomaMVF[,-63] formula.indirect <- Class~clv + lora + cs ~ . @ The variables on the left-hand side represent the intermediate variables, modeled by the explanatory variables on the right-hand side. Almost each modeling technique can be used to predict the intermediate variables. We chose a linear model by \texttt{pFUN = list(list(model = lm))}. <>= classify <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } fit <- inclass(formula.indirect, pFUN = list(list(model = lm)), cFUN = classify, data = GlaucomaMVF) @ \texttt{print} displays the subdivision of variables and the chosen modeling technique <>= print(fit) @ Furthermore, indirect classification predicts the intermediate variables based on the explanatory variables and classifies them according to a fixed classifying function in a second step, that means a deterministically known function for the class membership has to be specified. In our example this function is given in Figure \ref{diag} and implemented in the function \texttt{classify}.\\ Prediction of future observations is now performed by <>= predict(object = fit, newdata = GlaucomaMVF[c(1:3, 86:88),]) @ We perform a bootstrap aggregated indirect classification approach by choosing \texttt{pFUN = bagging} and specifying the number of bootstrap samples \citep{ifcs:2001}. Regression or classification trees are fitted for each bootstrap sample, with respect to the measurement scale of the specified intermediate variables <>= mypredict.rpart <- function(object, newdata) { RES <- predict(object, newdata) RET <- rep(NA, nrow(newdata)) NAMES <- rownames(newdata) RET[NAMES %in% names(RES)] <- RES[NAMES[NAMES %in% names(RES)]] RET } fit <- inbagg(formula.indirect, pFUN = list(list(model = rpart, predict = mypredict.rpart)), cFUN = classify, nbagg = 25, data = GlaucomaMVF) @ The call for the prediction of values remains unchanged. \section{Error Rate Estimation} Classification rules are usually assessed by their misclassification rate. Hence, error rate estimation is of main importance. The function \texttt{errorest} implements a unified interface to several resampling based estimators. Referring to the example, we apply a linear discriminant analysis and specify the error rate estimator by \texttt{estimator = "cv", "boot"} or \texttt{"632plus"}, respectively. A 10-fold cross validation is performed by choosing \texttt{estimator = "cv"} and \texttt{est.para = control.errorest(k = 10)}. The options \texttt{estimator = "boot"} or \texttt{estimator = "632plus"} deliver a bootstrap estimator and its bias corrected version {\sl .632+} \citep[see][]{efron:1997}, we specify the number of bootstrap samples to be drawn by \texttt{est.para = control.errorest(nboot = 50)}. Further arguments are required to particularize the classification technique. The argument \texttt{predict} represents the chosen predictive function. For a unified interface \texttt{predict} has to be based on the arguments \texttt{object} and \texttt{newdata} only, therefore a wrapper function \texttt{mypredict} is necessary for classifiers which require more than those arguments or do not return the predicted classes by default. For a linear discriminant analysis with \texttt{lda}, we need to specify <>= mypredict.lda <- function(object, newdata){ predict(object, newdata = newdata)$class } @ and calculate a 10-fold-cross-validated error rate estimator for a linear discriminant analysis by calling <>= errorest(Class ~ ., data= GlaucomaM, model=lda, estimator = "cv", predict= mypredict.lda) @ For the indirect approach the specification of the call becomes slightly more complicated. %Again for a unified interface a wrapper %function has to be used, which incorporates the fixed classification rule The bias corrected estimator {\sl .632+} is computed by <>= errorest(formula.indirect, data = GlaucomaMVF, model = inclass, estimator = "632plus", pFUN = list(list(model = lm)), cFUN = classify) @ Because of the subdivision of variables and a formula describing the modeling between explanatory and intermediate variables only, we must call the class membership variable. Hence, in contrast to the function \texttt{inclass} the data set \texttt{GlaucomaMVF} used in \texttt{errorest} must contain explanatory, intermediate and response variables. Sometimes it may be necessary to reduce the number of predictors before training a classifier. Estimating the error rate after the variable selection leads to biased estimates of the misclassfication error and therefore one should estimate the error rate of the whole procedure. Within the \texttt{errorest} framework, this can be done as follows. First, we define a function which does both variable selection and training of the classifier. For illustration proposes, we select the predictors by comparing their univariate $P$-values of a two-sample $t$-test with a prespecified level and train a LDA using the selected variables only. <>= mymod <- function(formula, data, level=0.05) { # select all predictors that are associated with an # univariate t.test p-value of less that level sel <- which(lapply(data, function(x) { if (!is.numeric(x)) return(1) else return(t.test(x ~ data$Class)$p.value) }) < level) # make sure that the response is still there sel <- c(which(colnames(data) %in% "Class"), sel) # compute a LDA using the selected predictors only mod <- lda(formula , data=data[,sel]) # and return a function for prediction function(newdata) { predict(mod, newdata=newdata[,sel])$class } } @ Note that \texttt{mymod} does not return an object of class \texttt{lda} but a function with argument \texttt{newdata} only. Thanks to lexical scoping, this function is used for computing predicted classes instead of a function \texttt{predict} passed to \texttt{errorest} as argument. Computing a $5$-fold cross-validated error rate estimator now is approximately a one-liner. <>= errorest(Class ~ . , data=GlaucomaM, model=mymod, estimator = "cv", est.para=control.errorest(k=5)) @ %%To summarize the performance of the different classification techniques in the considered example of glaucoma diagnosis, the 10-fold %%cross-validated error estimator delivers the %%results given in Table \ref{tenf}. %%\begin{figure} %%\begin{center} %%\begin{tabular}{ rrr } %%\hline %%dataset & method & error estimate \\ %%\hline %%\texttt{GlaucomaM} & {\sl slda} & 0.168 \\ %%\texttt{GlaucomaM} & {\sl bagging} & 0.158 \\ %%\texttt{GlaucomaM} & {\sl double-bagging} & 0.153 \\ %%\texttt{GlaucomaMVF} & {\sl inclass-bagging} & 0.206 \\ %%\tetxtt{GlaucomaMVF} & {\sl inclass-lm} & 0.229 \\ %%\hline %%\end{tabular} %%\caption{10-fold cross-validated error estimation of %%the misclassification error for several classification %%methods: {\sl slda} - stabilised linear discriminant analysis, %%{\sl bagging} - bagging with 50 bootstrap samples, %%{\sl double-bagging} - bagging with 50 bootstrap samples, %%combined with sLDA, {\sl inclass-bagging} - %%indirect classification using bagging, %%{\sl inclass-lm} indirect classification using %%linear modeling. \label{tenf}} %%\end{center} %%\end{figure} %%Note that an estimator of the variance is available for the ordinary %%bootstrap estimator (\texttt{estimator="boot"}) only, see \cite{efron:1997}. \section{Summary} \ipred tries to implement a unified interface to some recent developments in classification and error rate estimation. It is by no means finished nor perfect and we very much appreciate comments, suggestions and criticism. Currently, the major drawback is speed. Calling \texttt{rpart} $50$ times for each bootstrap sample is relatively inefficient but the design of interfaces was our main focus instead of optimization. Beside the examples shown, \texttt{bagging} can be used to compute bagging for regression trees and \texttt{errorest} computes estimators of the mean squared error for regression models. \bibliographystyle{plainnat} \bibliography{ipred} \end{document} ipred/R/0000755000176200001440000000000014120335534011556 5ustar liggesusersipred/R/csurv.R0000644000176200001440000000376713461342153013062 0ustar liggesusers# $Id: csurv.R,v 1.6 2003/03/28 12:55:32 hothorn Exp $ csurv <- function(newdata, pred, minprob=0, window=0.0001) { N <- nrow(newdata) if (!"hazard" %in% names(attributes(newdata))) stop("hazards attribute to newdata missing") hazards <- attr(newdata, "hazard") error <- rep(0, N) # if there is only one prediction for all observations GETPROB <- TRUE if (inherits(pred, "survfit")) { times <- pred$time # get times predprob <- getsurv(pred, times) # get steps GETPROB <- FALSE } for (i in 1:N) { if (GETPROB) { times <- pred[[i]]$time # get times predprob <- getsurv(pred[[i]], times) # get steps } # compute the integrated squared difference between # KM and S(t) # minprob: stop integration when S(t) < minprob lasttime <- -(log(minprob) / hazards[i]) if (max(times) > lasttime) { thisprob <- predprob[times <= lasttime] thistimes <- times[times <= lasttime] } else { thisprob <- predprob thistimes <- times } error[i] <- .Call(SdiffKM, as.double(c(0,thistimes)), as.double(c(1,thisprob)), as.double(c(hazards[i], window))) # adjust for time scale by last event error[i] <- error[i]/max(thistimes) if (length(unique(hazards)) == 1) { error <- error[i] break } } error <- mean(error) error } foo <- function (time, prob, hazard, window) { myint <- 0 time <- c(0, time) s <- exp(-time * hazard) prob <- c(1, prob) for (i in 1:(length(time)-1)) { d <- time[i+1] - time[i] if (d < window) { myint <- myint + 0.5 * d * ((prob[i] - s[i])^2 + (prob[i] - s[i + 1])^2) } else { k <- ceiling(d/window) wi <- d/k for (j in 1:k) myint <- myint + 0.5 * wi * ((prob[i] - exp(-(time[i] + (j - 1) * wi) * hazard))^2 + (prob[i] - exp(-(time[i] + j * wi) * hazard))^2) } } myint } ipred/R/print.R0000644000176200001440000001204413461342153013040 0ustar liggesusers#$Id: print.R,v 1.4 2004/02/09 08:08:21 peters Exp $ print.classbagg <- function(x, digits=4, ...) { cat("\n") B <- length(x$mtrees) if (B > 1) method <- paste("Bagging classification trees with", B, "bootstrap replications") else method <- "Classification tree" cat(method, "\n") if (!is.null(x$call)) { cat("\nCall: ") print(x$call) cat("\n") } if (x$OOB) { cat("Out-of-bag estimate of misclassification error: ", round(x$err, digits), "\n") } cat("\n") } print.regbagg <- function(x, digits=4, ...) { cat("\n") B <- length(x$mtrees) if (B > 1) method <- paste("Bagging regression trees with", B, "bootstrap replications") else method <- "Regression tree" cat(method, "\n") if (!is.null(x$call)) { cat("\nCall: ") print(x$call) cat("\n") } if (x$OOB) cat("Out-of-bag estimate of root mean squared error: ", round(x$err, digits), "\n") cat("\n") } print.survbagg <- function(x, digits=4, ...) { cat("\n") B <- length(x$mtrees) if (B > 1) method <- paste("Bagging survival trees with", B, "bootstrap replications") else method <- "Survival tree" cat(method, "\n") if (!is.null(x$call)) { cat("\nCall: ") print(x$call) cat("\n") } if (x$OOB) cat("Out-of-bag estimate of Brier's score: ", round(x$err, digits), "\n") cat("\n") } summary.classbagg <- function(object, ...) { print(object, ...) class(object) <- "summary.bagging" object } summary.regbagg <- function(object, ...) { print(object, ...) class(object) <- "summary.bagging" object } summary.survbagg <- function(object, ...) { print(object, ...) class(object) <- "summary.bagging" object } print.summary.bagging <- function(x, digits = max(3, getOption("digits")-3), ...) { cat("Trees: \n") print(x$mtrees) invisible(x$mtrees) } print.cvclass <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", paste(x$k, "-fold", sep=""), "cross-validation estimator of misclassification error \n") cat("\n") cat("Misclassification error: ", round(x$error, digits), "\n") cat("\n") } print.bootestclass <- function(x, digits=4, ...) { if(all(names(x)[names(x)!="call"] %in% c("boot", "632plus"))) { XX <- x for(i in c("boot", "632plus")) { x <- XX[[i]] x$call <- XX[["call"]] cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } if (x$bc632plus) { cat("\t", ".632+ Bootstrap estimator of misclassification error \n") } else { cat("\t", "Bootstrap estimator of misclassification error \n") } cat("\t with" , x$nboot, "bootstrap replications\n") cat("\n") cat("Misclassification error: ", round(x$error, digits), "\n") if (!x$bc632plus) cat("Standard deviation:", round(x$sd, digits), "\n") cat("\n") } } else { # if(!all(names(x) %in% c("boot", "632plus"))){ cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } if (x$bc632plus) cat("\t", ".632+ Bootstrap estimator of misclassification error \n") else cat("\t", "Bootstrap estimator of misclassification error \n") cat("\t with" , x$nboot, "bootstrap replications\n") cat("\n") cat("Misclassification error: ", round(x$error, digits), "\n") if (!x$bc632plus) cat("Standard deviation:", round(x$sd, digits), "\n") cat("\n") } } print.cvreg <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", paste(x$k, "-fold", sep=""), "cross-validation estimator of root mean squared error\n") cat("\n") cat("Root mean squared error: ", round(x$error, digits), "\n") cat("\n") } print.bootestreg <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", "Bootstrap estimator of root mean squared error \n") cat("\t with" , x$nboot, "bootstrap replications\n") cat("\n") cat("Root mean squared error: ", round(x$error, digits), "\n") cat("\n") } print.cvsurv <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", paste(x$k, "-fold", sep=""), "cross-validation estimator of Brier's score\n") cat("\n") cat("Brier's score: ", round(x$error, digits), "\n") cat("\n") } print.bootestsurv <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", "Bootstrap estimator of Brier's score\n") cat("\t with" , x$nboot, "bootstrap replications\n") cat("\n") cat("Brier's score: ", round(x$error, digits), "\n") cat("\n") } ipred/R/inclass.R0000644000176200001440000001423313461342153013342 0ustar liggesusers# $Id: inclass.R,v 1.33 2008/08/04 08:18:41 hothorn Exp $ inclass <- function(formula, data, ...) UseMethod("inclass", data) inclass.default <- function(formula, data, ...) { stop(paste("Do not know how to handle objects of class", class(data))) } inclass.data.frame <- function(formula, data, pFUN = NULL, cFUN = NULL, ...) { ##check formula if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) ##editing formula ###main formula if(length(formula[[2]])==3) { if(is.function(cFUN)) y.formula <- formula[[2]] else y.formula <- cFUN$formula w.formula <- XX~YY w.formula[[2]] <- formula[[2]][[3]] w.formula[[3]] <- formula[[3]] response <- paste(formula[[2]][[2]]) w.names <- attr(terms(as.formula(formula[[2]]), data = data), "term.labels") x.names <- attr(terms(as.formula(formula), data = data), "term.labels") if(x.names[1] == ".") x.names <- colnames(data)[!(colnames(data) %in% c(response, w.names))] } else { stop(paste("Specified formula has to be of type y~x~w")) } if(is.null(w.formula)) stop("no formula for prediction model specified") formula.list <- vector(mode = "list", length= length(w.names)) names(formula.list) <- w.names P <- length(pFUN) Qi <- length(w.names) for(j in 1:Qi) { res <- list() res$formula <- w.formula res$formula[[2]] <- as.name(attr(terms(res$formula[-3], data = data), "term.labels")[j]) if(res$formula[[3]] == ".") { res$formula <- as.formula(paste(res$formula[[2]], "~", paste(x.names, collapse= "+"))) } for(i in 1:P) { if(is.null(pFUN[[i]]$formula)) { if(is.null(formula.list[[w.names[j]]]$formula)) formula.list[[w.names[j]]]$formula <- res$formula if(is.null(formula.list[[w.names[j]]]$model)) formula.list[[w.names[j]]]$model <- pFUN[[i]]$model if(is.null(formula.list[[w.names[j]]]$predict)) formula.list[[w.names[j]]]$predict <- pFUN[[i]]$predict } else { QQ <- attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels") for(k in QQ) { if(w.names[j] == k) { res$formula[[3]] <- pFUN[[i]]$formula[[3]] if(paste(pFUN[[i]]$formula[[3]]) == ".") { res$formula[[3]] <- as.formula(paste(w.names[j], "~", paste(x.names, collapse= "+"))) } formula.list[[w.names[j]]]$formula <- pFUN[[i]]$formula formula.list[[w.names[j]]]$model <- pFUN[[i]]$model formula.list[[w.names[j]]]$predict <- pFUN[[i]]$predict } } } } } if(!is.function(cFUN)) { cFUN$formula <- y.formula if(is.null(cFUN$training.set)) cFUN$training.set <- "original" } result <- workhorse.inclass(object = formula.list, data = data, cFUN = cFUN, ...) return(result) } workhorse.inclass <- function(object, data, cFUN, subset, na.action, ...) { formula.list <- object q <- length(formula.list) result <- list() namen <- c() ##model fitting for(i in 1:q) { formula <- formula.list[[i]]$formula ##check necessary?? > if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") ## check necessary?? < m <- match.call(expand.dots= FALSE) res <- formula.list[[i]]$model(formula = formula, data = data) namen <- c(namen, as.character(formula[[2]])) result <- c(result, list(res)) } names(result) <- namen if(!is.function(cFUN)) { ###cFUN can be trained on original intermediate variables or on fitted values or on the subset if(!is.null(m$subset) && cFUN$training.set == "subset") dataresp <- data[!subset, ] if(cFUN$training.set == "original") dataresp <- data if(cFUN$training.set == "fitted") { dataresp <- data for(i in 1:q){ if(!is.null(formula.list[[i]]$predict)){ dataresp[,namen[i]] <- formula.list[[i]]$predict(result[[i]], newdata = data)} else { dataresp[,namen[i]] <- predict(result[[i]], newdata = data) } } } model.response <- cFUN$model(as.formula(cFUN$formula), data = dataresp, ...) } else { model.response <- cFUN } ###predict specificatiations are not delivered result <- list("model.intermediate" = result, "model.response" = model.response, "para.intermediate" = object, "para.response" = cFUN) class(result) <- "inclass" return(result) } print.inclass <- function(x, ...) { x <- x$model.intermediate q <- length(x) intermediates <- attr(x, "names") classes <- c() for(i in 1:q) { classes <- c(classes, class(x[[i]])) } text.intermediates <- paste("Indirect classification, with", q, "intermediate variables:") if(length(unique(classes)) == 1) { predictive <- paste("Predictive model per intermediate is", unique(classes)) } else { predictive <- paste("Predictive model per intermediate is \n", paste(intermediates, ": ", classes, "\n ", collapse = "")) } cat("\n", text.intermediates, "\n", intermediates, "\n", "\n", predictive, "\n") } summary.inclass <- function(object, ...) { class(object) <- "summary.inclass" object } print.summary.inclass <- function(x, ...) { x <- x$model.intermediate q <- length(x) intermediates <- attr(x, "names") classes <- c() for(i in 1:q) { classes <- c(classes, class(x[[i]])) } text.intermediates <- paste("Indirect classification, with", q, "intermediate variables:") if(length(unique(classes)) == 1) { predictive <- paste("Predictive model per intermediate is", unique(classes)) } else { predictive <- paste("Predictive model per intermediate is", "\n ", paste(intermediates, ": ", classes, "\n ", collapse = "")) } cat("\n", text.intermediates, "\n", intermediates, "\n", "\n", predictive, "\n", "\n", "Models:", "\n") print(x) } ipred/R/errorest.R0000644000176200001440000001302313461342153013547 0ustar liggesusers# $Id: errorest.R,v 1.25 2005/06/29 08:50:28 hothorn Exp $ control.errorest <- function(k= 10, nboot = 25, strat=FALSE, random=TRUE, predictions=FALSE, getmodels=FALSE, list.tindx = NULL) { if (k < 1) { warning("k < 1, using k=10") k <- 10 } if (nboot < 1) { warning("nboot < 1, using nboot=25") nboot <- 25 } if (!is.logical(strat)) { warning("strat is not a logical, using strat=FALSE") strat <- FALSE } if (!is.logical(random)) { warning("random is not a logical, using random=TRUE") random <- TRUE } if (!is.logical(predictions)) { warning("predictions is not a logical, using predictions=FALSE") predictions <- FALSE } if (!is.logical(getmodels)) { warning("getmodel is not a logical, using getmodels=FALSE") getmodels <- FALSE } RET <- list(k=k, nboot=nboot, strat=strat, random=random, predictions=predictions, getmodels=getmodels, list.tindx = list.tindx) return(RET) } errorest <- function(formula, data, ...) UseMethod("errorest", data) errorest.default <- function(formula, data, ...) stop(paste("Do not know how to handle objects of class", class(data))) errorest.data.frame <- function(formula, data, subset, na.action=na.omit, model=NULL, predict=NULL, estimator = c("cv", "boot", "632plus"), est.para = control.errorest(), ...) { cl <- match.call() m <- match.call(expand.dots = FALSE) if (length(grep("inclass", paste(m$model))) > 0 || length(grep("inbagg", paste(m$model))) > 0) { RET <- errorestinclass(formula, data=data, subset, na.action, model, predict, estimator, est.para, ...) RET$call <- cl } else { if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") NOPRED <- (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m[[1]] <- as.name("model.frame") m$... <- NULL m$model <- NULL m$predict <- NULL m$estimator <- NULL m$est.para <- NULL mf <- eval(m, parent.frame()) response <- attr(attr(mf, "terms"), "response") # just extract the data.frame, NA handling here # make sure to leave the time and censoring variable here # for "Surv(time, cens) ~ ." formulas # delete terms attribute attr(mf, "terms") <- NULL y <- mf[,response] if (!NOPRED & !is.Surv(y)) data <- mf else data <- data[complete.cases(data),] if(all(estimator %in% c("boot", "632plus")) & all(c("boot", "632plus") %in% estimator)) { estimator <- paste(sort(estimator), collapse = "_") } else { if(length(estimator) > 1) { estimator <- estimator[1] # warning(paste("Multiple choice of estimators, only", estimator, "is performed")) } else { estimator <- match.arg(estimator) } } if(is.null(model)) stop("no model specified") switch(estimator, "cv" = { RET <- cv(y, formula, data, model=model, predict=predict, k=est.para$k, random=est.para$random, predictions=est.para$predictions, strat=est.para$strat, getmodels=est.para$getmodels, list.tindx = est.para$list.tindx, ...) }, "boot" = { RET <- bootest(y, formula, data, model=model, predict=predict, nboot=est.para$nboot, list.tindx = est.para$list.tindx, predictions = est.para$predictions, ...) }, "632plus" = { RET <- bootest(y, formula, data, model=model, predict=predict, nboot=est.para$nboot, bc632plus=TRUE, list.tindx = est.para$list.tindx, predictions = est.para$predictions, ...) }, "632plus_boot" = { RET <- bootest(y, formula, data, model=model, predict=predict, nboot=est.para$nboot, bc632plus = TRUE, list.tindx = est.para$list.tindx, predictions = est.para$predictions, both.boot = TRUE, ...) } ) } RET$call <- cl return(RET) } errorestinclass <- function(formula, data, subset=NULL, na.action=NULL, model=NULL, predict=NULL, estimator = c("cv", "boot", "632plus"), est.para = control.errorest(), ...) { if (is.null(data)) stop("data argument required but not given") # if (is.null(iclass)) # stop("no class membership variable for indirect classification given") iclass <- paste(formula[[2]][[2]]) if (!(iclass %in% colnames(data))) stop("membership variable not in given data") # # data <- data[complete.cases(data),] # iclassindx <- which(colnames(data) == iclass) y <- data[,iclassindx] if (!is.factor(y)) stop("iclass is not a factor") # X <- data[,-iclassindx] X <- data if(is.null(model)) stop("no classifier specified") switch(estimator, "cv" = { RET <- cv(y, formula, data=X, model=model, predict=predict, k=est.para$k, random=est.para$random, list.tindx = est.para$list.tindx, ...) }, "boot" = { RET <- bootest(y, formula, data=X, model=model, predict=predict, nboot=est.para$nboot, list.tindx = est.para$list.tindx, ...) }, "632plus" = { RET <- bootest(y, formula, data=X, model=model, predict=predict, nboot=est.para$nboot, bc632plus=TRUE, list.tindx = est.para$list.tindx, ...) }) RET } ipred/R/sbrier.R0000644000176200001440000001162413461342153013175 0ustar liggesusers# $Id: sbrier.R,v 1.5 2009/03/27 16:18:38 hothorn Exp $ getsurv <- function(obj, times) { # get the survival probability for times from KM curve `obj' if (!inherits(obj, "survfit")) stop("obj is not of class survfit") # class(obj) <- NULL # lt <- length(times) nsurv <- times # if the times are the same, return the km-curve if(length(times) == length(obj$time)) { if (all(times == obj$time)) return(obj$surv) } # otherwise get the km-value for every element of times separatly inside <- times %in% obj$time for (i in (1:lt)) { if (inside[i]) nsurv[i] <- obj$surv[obj$time == times[i]] else { less <- obj$time[obj$time < times[i]] if (length(less) == 0) nsurv[i] <- 1 else nsurv[i] <- obj$surv[obj$time == max(less)] } } nsurv } sbrier <- function(obj, pred, btime = range(obj[,1])) { if(!inherits(obj, "Surv")) stop("obj is not of class Surv") # check for right censoring # class(obj) <- NULL # if (attr(obj, "type") != "right") stop("only right-censoring allowed") N <- nrow(obj) # get the times and censoring of the data, order them with resp. to time time <- obj[,1] ot <- order(time) cens <- obj[ot,2] time <- time[ot] # get the times to compute the (integrated) Brier score over if (is.null(btime)) stop("btime not given") if (length(btime) < 1) stop("btime not given") if (length(btime) == 2) { if (btime[1] < min(time)) warning("btime[1] is smaller than min(time)") if (btime[2] > max(time)) warning("btime[2] is larger than max(time)") btime <- time[time >= btime[1] & time <= btime[2]] } ptype <- class(pred) # S3 workaround if (is.null(ptype)) { if (is.vector(pred)) ptype <- "vector" if (is.list(pred)) ptype <- "list" } # if (ptype == "numeric" && is.vector(pred)) ptype <- "vector" survs <- NULL switch(ptype, survfit = { survs <- getsurv(pred, btime) survs <- matrix(rep(survs, N), nrow=length(btime)) }, list = { if (!inherits(pred[[1]], "survfit")) stop("pred is not a list of survfit objects") if (length(pred) != N) stop("pred must be of length(time)") pred <- pred[ot] survs <- matrix(unlist(lapply(pred, getsurv, times = btime)), nrow=length(btime), ncol=N) }, vector = { if (length(pred) != N) stop("pred must be of length(time)") if (length(btime) != 1) stop("cannot compute integrated Brier score with pred") survs <- pred[ot] }, matrix = { # if (all(dim(pred) == c(length(btime), N))) survs <- pred[,ot] else stop("wrong dimensions of pred") # }) if (is.null(survs)) stop("unknown type of pred") # reverse Kaplan-Meier: estimate censoring distribution ### deal with ties hatcdist <- prodlim(Surv(time, cens) ~ 1,reverse = TRUE) csurv <- predict(hatcdist, times = time, type = "surv") csurv[csurv == 0] <- Inf # hatcdist <- survfit(Surv(time, 1 - cens) ~ 1) # csurv <- getsurv(hatcdist, time) # csurv[csurv == 0] <- Inf # conditional survival for new timepoints csurv_btime <- predict(hatcdist, times = btime, type = "surv") csurv_btime[is.na(csurv_btime)] <- min(csurv_btime, na.rm = TRUE) csurv_btime[csurv_btime == 0] <- Inf bsc <- rep(0, length(btime)) # compute Lebesque-integrated Brier score if (length(btime) > 1) { for (j in 1:length(btime)) { help1 <- as.integer(time <= btime[j] & cens == 1) help2 <- as.integer(time > btime[j]) bsc[j] <- mean((0 - survs[j,])^2*help1*(1/csurv) + (1-survs[j,])^2*help2*(1/csurv_btime[j])) } ### apply trapezoid rule idx <- 2:length(btime) RET <- diff(btime) %*% ((bsc[idx - 1] + bsc[idx]) / 2) RET <- RET / diff(range(btime)) ### previously was #diffs <- c(btime[1], btime[2:length(btime)] - # btime[1:(length(btime)-1)]) #RET <- sum(diffs*bsc)/max(btime) names(RET) <- "integrated Brier score" attr(RET, "time") <- range(btime) # compute Brier score at one single time `btime' } else { help1 <- as.integer(time <= btime & cens == 1) help2 <- as.integer(time > btime) cs <- predict(hatcdist, times=btime, type = "surv") ### cs <- getsurv(hatcdist, btime) if (cs == 0) cs <- Inf RET <- mean((0 - survs)^2*help1*(1/csurv) + (1-survs)^2*help2*(1/cs)) names(RET) <- "Brier score" attr(RET, "time") <- btime } RET } ipred/R/bootest.R0000644000176200001440000002136213461342153013366 0ustar liggesusers# $Id: bootest.R,v 1.18 2004/02/09 08:08:21 peters Exp $ bootest <- function(y, ...) { if(is.null(class(y))) class(y) <- data.class(y) UseMethod("bootest", y) } bootest.default <- function(y, ...) { stop(paste("Do not know how to handle objects of class", class(y))) } bootest.integer <- function(y, ...) { bootest.numeric(y, ...) } bootest.factor <- function(y, formula, data, model, predict, nboot=25, bc632plus = FALSE, list.tindx = NULL, predictions = FALSE, both.boot = FALSE, ...) { # bootstrap estimator of misclassification error N <- length(y) nindx <- 1:N if(!is.null(list.tindx)) nboot <- length(list.tindx) bootindx <- matrix(NA, ncol=nboot, nrow=N) if(predictions) { BOOTINDX <- data.frame(matrix(NA, ncol=nboot, nrow=N)) } classes <- levels(y) USEPM <- FALSE if(!is.data.frame(data)) stop("data is not a data.frame") if(nboot <=2) stop("to small number of bootstrap replications") if(is.null(nboot)) stop("number of bootstrap replications is missing") if(!is.null(list.tindx) & length(list.tindx) != nboot) stop(paste("List of selected observations per bootstrap sample has to be", nboot)) for(i in 1:nboot) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] if(length(tindx) > N) warning("number of selected observations is larger than the sample size") } else { tindx <- sample(nindx, N, replace = TRUE) } mymodel <- model(formula, data = data[tindx,], ...) # check if mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } if (USEPM) pred <- predict(newdata=data) else pred <- predict(mymodel, newdata = data) if (!is.factor(pred)) stop("predict does not return factor values") pred <- factor(pred, levels=classes)[-tindx] if (length(pred) != length(y[-tindx])) stop("different length of data and prediction") if(predictions) { BOOTINDX[,i] <- factor(BOOTINDX[,i],levels = classes) BOOTINDX[-tindx, i] <- pred } bootindx[-tindx, i] <- (pred != y[-tindx]) } fun <- function(x) ifelse(all(is.na(x)), NA, mean(as.integer(x), na.rm = TRUE)) one <- mean(apply(bootindx, 1, fun), na.rm = TRUE) if (bc632plus) { full.model <- model(formula, data = data, ...) # check if full.model is a function which should be used instead of # predict if (is.function(full.model)) { predict <- full.model USEPM <- TRUE } if (USEPM) full.pred <- predict(newdata=data) else full.pred <- predict(full.model, newdata = data) resubst <- mean(full.pred != y, na.rm = TRUE) err632 <- 0.368*resubst + 0.632*one y <- y[!is.na(y) & !is.na(full.pred)] full.pred <- full.pred[!is.na(y) & !is.na(full.pred)] gamma <- sum(outer(y, full.pred, function(x, y) ifelse(x==y, 0, 1) ))/ (length(y)^2) r <- (one - resubst)/(gamma - resubst) r <- ifelse(one > resubst & gamma > resubst, r, 0) errprime <- min(one, gamma) # weight <- .632/(1-.368*r) # err <- (1-weight)*resubst + weight*one err <- err632 + (errprime - resubst)*(0.368*0.632*r)/(1-0.368*r) if(predictions) RET <- list(error = err, nboot = nboot, bc632plus = TRUE, predictions = BOOTINDX) else RET <- list(error = err, nboot=nboot, bc632plus = TRUE) if(both.boot){ bc632plus <- FALSE RETbc <- RET } } if(!bc632plus) { err <- one expb <- rep(0, nboot) for(i in 1:nboot) expb[i] <- mean(apply(bootindx[,-i], 1, fun), na.rm = TRUE) sdint <- sqrt( ((nboot - 1)/nboot)*sum((expb - mean(expb))^2) ) if(predictions) RET <- list(error = err, sd = sdint, bc632plus = FALSE, nboot = nboot, predictions = BOOTINDX) else RET <- list(error = err, sd=sdint, bc632plus=FALSE, nboot=nboot) if(both.boot){ RET <- list("boot" = RET, "632plus" = RETbc) } } class(RET) <- "bootestclass" RET } bootest.numeric <- function(y, formula, data, model, predict, nboot=25, bc632plus=FALSE, list.tindx = NULL, predictions = FALSE, ...) { # bootstrap estimator of root of mean squared error if (bc632plus) stop("cannot compute 632+ estimator of mean squared error") if(!is.null(list.tindx)) nboot <- length(list.tindx) if (nboot <=2) stop("to small number of bootstrap replications") ##FIX: nrow = N <- length(y) nindx <- 1:N bootindx <- matrix(NA, ncol=nboot, nrow=N) if(predictions) BOOTINDX <- matrix(NA, ncol=nboot, nrow=N) USEPM <- FALSE if (!is.data.frame(data)) stop("data is not a data.frame") if(is.null(nboot)) stop("number of bootstrap replications is missing") if(!is.null(list.tindx) & length(list.tindx) != nboot) stop(paste("List of selected observations per bootstrap sample has to be", nboot)) for(i in 1:nboot) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] if(length(tindx) > N) warning("number of selected observations is larger than the sample size") } else { tindx <- sample(nindx, N, replace = TRUE) } # tindx <- ifelse(!is.null(list.tindx), list.tindx[[i]], sample(nindx, N, replace = TRUE)) mymodel <- model(formula, data = data[tindx,], ...) outbootdata <- subset(data, !(nindx %in% tindx)) # check if mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } if (USEPM) pred <- predict(newdata=outbootdata) else pred <- predict(mymodel, newdata = outbootdata) if (!is.numeric(pred)) stop("predict does not return numerical values") if (length(pred) != length(y[-tindx])) stop("different length of data and prediction") if(predictions) BOOTINDX[-tindx, i] <- pred bootindx[-tindx, i] <- (pred - y[-tindx])^2 } fun <- function(x) ifelse(all(is.na(x)), NA, mean(x, na.rm = TRUE)) err <- sqrt(mean(apply(bootindx, 1, fun), na.rm = TRUE)) if(predictions) RET <- list(error = err, nboot = nboot, predictions = BOOTINDX) else RET <- list(error = err, nboot=nboot) class(RET) <- "bootestreg" RET } bootest.Surv <- function(y, formula, data=NULL, model, predict, nboot=25, bc632plus=FALSE, list.tindx = NULL, predictions = FALSE, ...) { # bootstrap estimator of Brier's score if (bc632plus) stop("cannot compute 632+ estimator of Brier's score") N <- dim(y)[1] if(!is.null(list.tindx)) nboot <- length(list.tindx) nindx <- 1:N bootindx <- matrix(NA, ncol=nboot, nrow=N) if(predictions) BOOTINDX <- matrix(NA, ncol=nboot, nrow=N) USEPM <- FALSE if(is.null(nboot)) stop("number of bootstrap replications is missing") if (nboot <=2) stop("to small number of bootstrap replications") if (is.null(data)) data <- as.data.frame(rep(1, N)) if (!is.data.frame(data)) stop("data is not a data.frame") if(!is.null(list.tindx)) nboot <- length(list.tindx) for(i in 1:nboot) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] if(tindx > N) warning("number of selected observations is larger than the sample size") } else { tindx <- sample(nindx, N, replace = TRUE) } #tindx <- ifelse(!is.null(list.tindx), list.tindx[[i]], sample(nindx, N, replace = TRUE)) #tindx <- sample(nindx, N, replace = TRUE) mymodel <- model(formula, data=data[tindx,], ...) outbootdata <- subset(data, !(nindx %in% tindx)) # check if mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } if (USEPM) pred <- predict(newdata=outbootdata) else pred <- predict(mymodel, newdata = outbootdata) if (is.list(pred)) { if (!inherits(pred[[1]], "survfit") && !inherits(pred, "survfit")) stop("predict does not return a list of survfit objects") } else { stop("predict does not return a list of survfit objects") } if(predictions) BOOTINDX[-tindx, i] <- sbrier(y[-tindx], pred) ###??? bootindx[-tindx, i] <- sbrier(y[-tindx], pred) } fun <- function(x) ifelse(all(is.na(x)), NA, mean(x, na.rm = TRUE)) err <- mean(apply(bootindx, 1, fun), na.rm = TRUE) if(predictions) RET <- list(error = err, nboot = nboot, predictions = BOOTINDX) else RET <- list(error = err, nboot=nboot) class(RET) <- "bootestsurv" RET } ipred/R/ipredbagg.R0000644000176200001440000002043713461342153013635 0ustar liggesusers#$Id: ipredbagg.R,v 1.13 2003/06/11 10:40:17 peters Exp $ workhorse <- function(y, X, control, comb, bcontrol, thisclass, ...) { # This is double-bagging (comb is lda) or bundling (any arbritrary # model in comb) if (!is.data.frame(X)) X <- as.data.frame(X) # check user supplied functions if (!is.list(comb)) stop("comb not a list") N <- nrow(X) mydata <- cbind(data.frame(y), X) mtrees <- vector(mode="list", length=bcontrol$nbagg) for (i in 1:bcontrol$nbagg) { # double-bagging or bundling # comb is a list of lists, each of them having two elements: # model and predict bindx <- sample(1:N, bcontrol$ns, replace=bcontrol$replace) objs <- vector(mode="list", length=length(comb)) addclass <- function() { myindx <- 1:length(comb) for (k in 1:length(comb)) { # put the user supplied models into a try statement # if this fails, simply ignore it. # options(show.error.messages = FALSE) oX <- mydata[-bindx,] foo <- try(comb[[k]]$model(y ~ ., data=oX)) if (inherits(foo, "try-error")) { warning("could not build model:") print(foo[1]) foo <- NA myindx <- myindx[-k] } objs[[k]] <- foo # options(show.error.messages = TRUE) } fct <- function(newdata) { # use lexical scoping: return this function for the computation of # the additional predictors if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) addpred <- c() # the user supplied model failed, ignore it here. if (length(myindx) < 1) { RET <- NULL } else { # compute additional predictors for user supplied models for (k in myindx) addpred <- cbind(addpred, comb[[k]]$predict(objs[[k]], newdata)) # : more informative names??? colnames(addpred) <- paste("addpred", 1:ncol(addpred), sep="") # RET <- addpred } RET } if (length(myindx) < 1) return(NULL) else return(fct) } bfct <- addclass() # may have failed if (!is.null(bfct)) { # grow a tree using the original predictors # from the bootstrap sample and the additional predictors computed on # the bootstrap sample. oX <- cbind(mydata, bfct(X))[bindx,] btree <- rpart(y ~., data=oX, control = control,...) # return this object this <- list(bindx = bindx, btree = btree, bfct=bfct) } else { # return a simple tree if the user supplied model failed. oX <- mydata[bindx,] btree <- rpart(y ~., data=oX, control = control,...) this <- list(bindx = bindx, btree = btree) } class(this) <- thisclass mtrees[[i]] <- this } mtrees } ipredbagg <- function(y, ...) { if(is.null(class(y))) class(y) <- data.class(y) # UseMethod("ipredbagg", y, ...) UseMethod("ipredbagg", y) } ipredbagg.default <- function(y, ...) { stop(paste("Do not know how to handle objects of class", class(y))) } ipredbagg.integer <- function(y, ...) { ipredbagg.numeric(y,...) } ipredbagg.factor <- function(y, X=NULL, nbagg=25, control= rpart.control(minsplit=2, cp=0, xval=0), comb=NULL, coob=FALSE, ns=length(y), keepX = TRUE, ...) { # bagging classification trees if (!is.null(comb) && coob) stop("cannot compute out-of-bag estimate for combined models") if (nbagg == 1 && coob) stop("cannot compute out-of-bag estimate for single tree") # check nbagg if (nbagg < 1) stop("nbagg is not a positive integer") # bagging only if nbagg greater 1, else use the whole sample, i.e. one # simple tree if (nbagg == 1) { REPLACE <- FALSE } else { if (ns < length(y)) { # this is "subagging", i.e. sampling ns out of length(y) WITHOUT # replacement REPLACE <- FALSE } else { # the usual bootstrap: n out of n with replacement REPLACE <- TRUE } } if (!is.null(comb)) { # this is rather slow but we need to be as general as possible # with respect to classifiers as well as outcome of prediction (classes, # linear discriminant functions, conditional class probabilities, random # noise, if you like) mtrees <- workhorse(y, X, control, comb, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE), thisclass="sclass") } else { # use an optimized version mydata <- cbind(data.frame(y), X) mtrees <- irpart(y ~ ., data=mydata, control=control, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE)) } # always keep response and predictors as well as a list of nbagg objects # of class "sclass" if (keepX) RET <- list(y=y, X=X, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) else RET <- list(y=y, X=NULL, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) class(RET) <- "classbagg" if (coob) { pred <- predict(RET) ae <- all.equal(levels(pred), levels(RET$y)) if (is.logical(ae) && ae) RET$err <- mean(pred != RET$y, na.rm=TRUE) else RET$err <- mean(as.character(pred) != as.character(RET$y), na.rm=TRUE) } RET } ipredbagg.numeric <- function(y, X=NULL, nbagg=25, control= rpart.control(xval=0), comb=NULL, coob=FALSE, ns=length(y), keepX = TRUE, ...) { # is control meaningful here??? # bagging regression trees if (!is.null(comb) && coob) stop("cannot compute out-of-bag estimate for combined models") if (nbagg == 1 && coob) stop("cannot compute out-of-bag estimate for single tree") # check nbagg if (nbagg < 1) stop("nbagg is not a positive integer") # only bagg if nbagg greater 1, else use the whole sample if (nbagg == 1) { REPLACE <- FALSE } else { if (ns < length(y)) { # this is "subagging", i.e. sampling ns out of length(y) WITHOUT # replacement REPLACE <- FALSE } else { # the usual bootstrap: n out of n with replacement REPLACE <- TRUE } } if (!is.null(comb)) { mtrees <- workhorse(y, X, control, comb, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE), thisclass="sreg") } else { mydata <- cbind(data.frame(y), X) mtrees <- irpart(y ~ ., data=mydata, control=control, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE)) } if (keepX) RET <- list(y=y, X=X, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) else RET <- list(y=y, X=NULL, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) class(RET) <- "regbagg" if (coob) RET$err <- sqrt(mean((predict(RET) - RET$y)^2, na.rm=TRUE)) RET } ipredbagg.Surv <- function(y, X=NULL, nbagg=25, control= rpart.control(xval=0), comb=NULL, coob=FALSE, ns=dim(y)[1], keepX = TRUE, ...) { # is control meaningful here??? # bagging survival trees if (!is.null(comb) && coob) stop("cannot compute out-of-bag estimate for combined models") if (nbagg == 1 && coob) stop("cannot compute out-of-bag estimate for single tree") # check nbagg if (nbagg < 1) stop("nbagg is not a positive integer") # only bagg if nbagg greater 1, else use the whole sample if (nbagg == 1) { REPLACE <- FALSE } else { if (ns < dim(y)[1]) { # this is "subagging", i.e. sampling ns out of length(y) WITHOUT # replacement REPLACE <- FALSE } else { # the usual bootstrap: n out of n with replacement REPLACE <- TRUE } } if (!is.null(comb)) { mtrees <- workhorse(y, X, control, comb, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE), thisclass="ssurv") } else { mydata <- cbind(data.frame(y), X) mtrees <- irpart(y ~ ., data=mydata, control=control, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE)) } if (keepX) RET <- list(y=y, X=X, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) else RET <- list(y=y, X=NULL, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) class(RET) <- "survbagg" if (coob) RET$err <- sbrier(RET$y, predict(RET)) RET } ipred/R/predict.irpart.R0000644000176200001440000000347513461342153014646 0ustar liggesusers# # a modified version of `predict.rpart.s' from the rpart package # see COPYRIGHTS for details. # predict.irpart <- function(object, newdata = list(), type = c("vector", "prob", "class", "matrix"), ...) { if(!inherits(object, "rpart")) stop("Not legitimate tree") mtype <- missing(type) type <- match.arg(type) if(missing(newdata)) where <- object$where else { if(is.null(attr(newdata, "terms")) & !inherits(newdata, "rpart.matrix")) { Terms <- delete.response(object$terms) act <- (object$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(object, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } where <- getFromNamespace("pred.rpart", ns = "rpart")(object, newdata) } frame <- object$frame method <- object$method ylevels <- attr(object, "ylevels") nclass <- length(ylevels) if(mtype && nclass > 0) type <- "prob" if(type == "vector" || (type=="matrix" && is.null(frame$yval2))) { pred <- frame$yval[where] names(pred) <- names(where) } else if (type == "matrix") { pred <- frame$yval2[where,] dimnames(pred) <- list(names(where), NULL) } else if(type == "class" && nclass > 0) { pred <- factor(ylevels[frame$yval[where]], levels=ylevels) names(pred) <- names(where) } else if (type == "prob" && nclass > 0) { pred <- frame$yval2[where, 1 + nclass + 1:nclass] dimnames(pred) <- list(names(where), ylevels) } else stop("Invalid prediction for rpart object") # Expand out the missing values in the result # But only if operating on the original dataset if (missing(newdata) && !is.null(object$na.action)) pred <- naresid(object$na.action, pred) pred } ipred/R/kfoldcv.R0000644000176200001440000000120313461342153013327 0ustar liggesusers# $Id: kfoldcv.R,v 1.3 2002/09/12 08:56:42 hothorn Exp $ kfoldcv <- function(k,N, nlevel=NULL) { if (is.null(nlevel)) { # no stratification if (k > N) return(c(rep(1, N), rep(0, k-N))) fl <- floor(N/k) ce <- ceiling(N/k) if (fl == ce) return(rep(fl, k)) else return(c(rep(ce, round((N/k - fl)*k)), rep(fl, round((1 - (N/k - fl))*k)))) } else { # stratification # if (!is.integer(nlevel)) stop("nlevel is not a vector if integers") kmat <- matrix(0, ncol=k, nrow=length(nlevel)) for (i in 1:length(nlevel)) kmat[i,] <- kfoldcv(k, nlevel[i]) return(kmat) } } ipred/R/predict.inclass.R0000644000176200001440000000241113461342153014766 0ustar liggesusers# $Id: predict.inclass.R,v 1.19 2003/03/31 08:44:16 peters Exp $ # Additional option type ="class", if intermediate is nominal predict.inclass <- function(object, newdata, ...) { if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata) q <- length(object$model.intermediate) # number of intermediates namen <- names(object$model.intermediate) intermediate <- is.vector(NULL, mode = "NULL") for(i in 1:q) { if(!is.null(object$para.intermediate[[i]][["predict"]])) { RET <- object$para.intermediate[[i]][["predict"]](object$model.intermediate[[i]], newdata = newdata, ...) } else { RET <- predict(object$model.intermediate[[i]], newdata = newdata, ...) } intermediate <- data.frame(intermediate, RET) } intermediate <- intermediate[,-1] names(intermediate) <- namen intermediate <- data.frame(newdata[,!(names(newdata) %in% names(intermediate))], intermediate) if(!is.function(object$para.response)) { if(!is.null(object$para.response[["predict"]])) { RET <- object$para.response[["predict"]](object$model.response, newdata = intermediate, ...) } else { RET <- predict(object$model.response, newdata = intermediate, ...) } } else { RET <- object$para.response(intermediate) } return(RET) } ipred/R/varset.R0000644000176200001440000000143713461342153013214 0ustar liggesusers# $Id: varset.R,v 1.2 2002/03/26 16:29:15 hothorn Exp $ varset <- function(N, sigma = 0.1, theta = 90, threshold = 0, u = 1:3) { # create U U <- matrix(rep(0, 4), ncol = 2) U[1, 1] <- u[1] U[1, 2] <- u[2] U[2, 1] <- u[3] U[2, 2] <- (theta-u[1]*u[3])/u[2] lambda <- sqrt(U[1, 1]^2 + U[1, 2]^2) U[1, ] <- U[1, ]/lambda lambda <- sqrt(U[2, 1]^2 + U[2, 2]^2) U[2, ] <- U[2, ]/lambda e <- matrix(rnorm(2*N, sd = sigma), ncol = 2, byrow = TRUE) expl <- matrix(rnorm(2*N), ncol = 2, byrow = TRUE) inter <- t(U %*%t(expl) + t(e)) z <- (inter > threshold) resp <- as.factor(ifelse((z[,1] + z[,2]) > 1, 1, 0)) colnames(expl) <- c("x1", "x2") colnames(inter) <- c("y1", "y2") result <- list(explanatory = expl, intermediate = inter, response = resp) return(result) } ipred/R/cv.R0000644000176200001440000001642413461342153012322 0ustar liggesusers#$Id: cv.R,v 1.21 2004/02/11 09:13:51 peters Exp $ cv <- function(y, ...) { if(is.null(class(y))) class(y) <- data.class(y) UseMethod("cv", y) } cv.default <- function(y, ...) { stop(paste("Do not know how to handle objects of class", class(y))) } cv.integer <- function(y, ...) { cv.numeric(y, ...) } cv.factor <- function(y, formula, data, model, predict, k=10, random=TRUE, strat=FALSE, predictions=NULL, getmodels=NULL, list.tindx = NULL, ...) { # k-fold cross-validation of misclassification error if (!is.data.frame(data)) stop("data is not of class data.frame") N <- length(y) classes <- levels(y) if (is.null(k)) k <- 10 if (is.null(random)) random <- TRUE if (is.null(strat)) strat <- FALSE if (is.null(predictions)) predictions <- FALSE if (is.null(getmodels)) getmodels <- FALSE USEPM <- FALSE if(!is.null(list.tindx)) k <- length(list.tindx) if(!is.null(list.tindx)) { random <- FALSE } # to reproduce results, either use `set.seed' or a fixed partition of # the samples if (random) myindx <- sample(1:N, N) else myindx <- 1:N y <- y[myindx] data <- data[myindx,] # determine an appropriate splitting for the sample size into # k roughly equally sized parts mysplit <- ssubset(y, k, strat=strat) allpred <- vector(mode="character", length=N) fu <- function(x) levels(x)[as.integer(x)] nindx <- 1:N if (getmodels) models <- vector(k, mode="list") for(i in 1:k) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] } else { tindx <- mysplit[[i]] } folddata <- subset(data, !(nindx %in% tindx)) mymodel <- model(formula, data=folddata, ...) if (getmodels) models[[i]] <- mymodel # check of mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } # we assume predict to return factor levels if (USEPM) pred <- predict(newdata=data) else pred <- predict(mymodel, newdata = data) if (!is.factor(pred)) stop("predict does not return factor values") pred <- factor(pred, levels=classes) # # there is no c() for factors which preserves the levels, isn't it? # use characters allpred[tindx] <- fu(pred[tindx]) # } allpred <- factor(allpred, levels=classes) allpred <- allpred[order(myindx)] err <- mean(allpred != y[order(myindx)], na.rm = TRUE) if (predictions) RET <- list(error = err, k = k, predictions=allpred) else RET <- list(error = err, k = k) if (getmodels) RET <- c(RET, models=list(models)) class(RET) <- "cvclass" RET } cv.numeric <- function(y, formula, data, model, predict, k=10, random=TRUE, predictions=NULL, strat=NULL, getmodels=NULL, list.tindx = NULL, ...) { # k-fold cross-validation of mean squared error if (!is.data.frame(data)) stop("data is not of class data.frame") if(!is.null(list.tindx)) k <- length(list.tindx) N <- length(y) if (is.null(k)) k <- 10 if (is.null(random)) random <- TRUE if (is.null(predictions)) predictions <- FALSE if (is.null(getmodels)) getmodels <- FALSE USEPM <- FALSE # determine an appropriate splitting for the sample size into # k roughly equally sized parts # if(is.null(list.tindx)) { a <- kfoldcv(k, N) # to reproduce results, either use `set.seed' or a fixed partition of # the samples if (random) myindx <- sample(1:N, N) else myindx <- 1:N nindx <- 1:N # } if (getmodels) models <- vector(k, mode="list") allpred <- rep(0, N) for(i in 1:k) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] } else { if (i > 1) tindx <- myindx[(sum(a[1:(i-1)])+1):sum(a[1:i])] else tindx <- myindx[1:a[1]] } folddata <- subset(data, !(nindx %in% tindx)) mymodel <- model(formula, data=folddata, ...) if (getmodels) models[[i]] <- mymodel # check of mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } outfolddata <- subset(data, nindx %in% tindx) if (USEPM) pred <- predict(newdata=outfolddata) else pred <- predict(mymodel, newdata = outfolddata) if (!is.numeric(pred)) stop("predict does not return numerical values") allpred[sort(tindx)] <- pred } err <- sqrt(mean((allpred - y)^2, na.rm = TRUE)) if (predictions) RET <- list(error = err, k = k, predictions=allpred) else RET <- list(error = err, k = k) if (getmodels) RET <- c(RET, models=list(models)) class(RET) <- "cvreg" RET } cv.Surv <- function(y, formula, data=NULL, model, predict, k=10, random=TRUE, predictions=FALSE, strat=FALSE, getmodels=NULL, list.tindx = NULL, ...) { # k-fold cross-validation of Brier's score if (is.null(predictions)) predictions <- FALSE if(is.null(random)) random <- TRUE if (is.null(predictions)) predictions <- FALSE if (is.null(strat)) strat <- FALSE if (is.null(getmodels)) getmodels <- FALSE USEPM <- FALSE N <- length(y[,1]) nindx <- 1:N if(is.null(random)) random <- TRUE if(is.null(k)) k <- 10 if (is.null(data)) data <- rep(1, N) if(!is.null(list.tindx)) k <- length(list.tindx) if(is.null(k)) stop("k for k-fold cross-validation is missing") # determine an appropriate splitting for the sample size into # k roughly equally sized parts # if(is.null(list.tindx)) { a <- kfoldcv(k, N) # to reproduce results, either use `set.seed' or a fixed partition of # the samples if (random) myindx <- sample(1:N, N) else myindx <- 1:N # } if (getmodels) models <- vector(k, mode="list") cverr <- c() for(i in 1:k) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] } else { if (i > 1) tindx <- myindx[(sum(a[1:(i-1)])+1):sum(a[1:i])] else tindx <- myindx[1:a[1]] } folddata <- subset(data, !(nindx %in% tindx)) mymodel <- model(formula, data=folddata, ...) if (getmodels) models[[i]] <- mymodel # check if mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } outfolddata <- subset(data, (nindx %in% tindx)) if (USEPM) pred <- predict(newdata=outfolddata) else pred <- predict(mymodel, newdata = outfolddata) if (is.list(pred)) { if (!inherits(pred[[1]], "survfit") && !inherits(pred, "survfit")) stop("predict does not return a list of survfit objects") } else { stop("predict does not return a list of survfit objects") } err <- sbrier(y[sort(tindx)], pred) cverr <- c(cverr,rep(err, length(tindx))) } RET <- list(error = mean(cverr), k=k) if (getmodels) RET <- c(RET, models=list(models)) class(RET) <- "cvsurv" RET } ipred/R/inbagg.R0000644000176200001440000002153613461342153013141 0ustar liggesusersworkhorse.inbagg <- function(object, y, X, W, cFUN, w.training.set, y.training.set, bcontrol, control, ...) { formula.list <- object data <- data.frame(y, X, W) mtrees <- vector(mode="list", length=bcontrol$nbagg) if(w.training.set[1] == "all") fit.vals <- 1:length(y) for (i in 1:bcontrol$nbagg) { bindx <- sample(1:length(y), bcontrol$ns, replace=bcontrol$replace) if(w.training.set[1] == "oob") fit.vals <- (-bindx) if(w.training.set[1] == "bag") fit.vals <- bindx objs <- vector(mode="list", length=length(formula.list)) #prediction models for intermediate variables names(objs) <- names(formula.list) addclass <- function() { ##START addclass <- function() for (j in 1:length(formula.list)) { ##Fitting prediction models for intermediates oX <- data[fit.vals, c(paste(formula.list[[j]]$formula[[2]]), attr(terms(formula.list[[j]]$formula, dataa = data), "term.labels"))] foo <- try(formula.list[[j]]$model(formula.list[[j]]$formula, data = oX)) objs[[j]] <- foo } fct <- function(newdata) { ##START fct <- function(newdata) if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) add.predictors <- rep(0, nrow(newdata)) for (j in 1:length(formula.list)){ ## predict additional intermediates using fitted models oXnewdata <- newdata[,attr(terms(formula.list[[j]]$formula, data = data), "term.labels")] if(is.null(formula.list[[j]]$predict)) { res <- try(predict(objs[[j]], newdata = oXnewdata)) } else { res <- try(formula.list[[j]]$predict(objs[[j]], newdata = oXnewdata)) } ###FIX: action for class(res) == "try-error" add.predictors <- data.frame(add.predictors, res) } add.predictors <- add.predictors[,-1] if(is.null(dim(add.predictors))) add.predictors <- matrix(add.predictors, ncol = 1) colnames(add.predictors) <- names(formula.list) add.predictors } ##END fct <- function(newdata) return(fct) } ##END addclass <- function() bfct <- addclass() ###bfct is a function (addclass) if (!is.null(bfct)) { expl.cFUN <- attr(terms(as.formula(cFUN$formula), data = data), "term.labels") if(!is.null(cFUN$fixed.function)) { btree <- cFUN } else { W.new <- bfct(X) W.new.names <- sub(".[0-9]$", "", colnames(W.new)) if(y.training.set[1] == "fitted.bag") { ###contstruct on bag oX <- data.frame(y, X, W.new)[bindx,] right.side <- paste(c(expl.cFUN[!(expl.cFUN %in% W.new.names)], colnames(W.new)[W.new.names %in% expl.cFUN]), collapse = "+") cFUN$formula <- as.formula(paste(cFUN$formula[[2]], "~", right.side)) } if(y.training.set[1] == "original") { ###construct on original variables if(length(W.new.names)> length(colnames(W))) stop("If classifying function is trained on original intermediate, only one predictive model per intermediate can be constructed.") oX <- data.frame(y, X, W[,W.new.names]) names(oX)[(ncol(oX)-ncol(W)+1):ncol(oX)] <- colnames(W.new) } if(y.training.set[1] == "fitted.subset") { ###construct on subset oX <- data.frame(y, X, W.new)[!subset,] right.side <- paste(c(expl.cFUN[!(expl.cFUN %in% W.new.names)], colnames(W.new)[W.new.names %in% expl.cFUN]), collapse = "+") cFUN$formula <- as.formula(paste(cFUN$formula[[2]], "~", right.side)) } names(oX)[names(oX) == "y"] <- paste(cFUN$formula[[2]]) btree <- cFUN$model(cFUN$formula, data = oX, ...) btree <- list(model = btree, predict = cFUN$predict) } this <- list(bindx = bindx, btree = btree, bfct=bfct) } else { stop("Predictive function for intermediates not executable: Classifying function can not be applied.") } class(this) <- "thisclass" mtrees[[i]] <- this } mtrees } inbagg <- function(formula, data, ...) UseMethod("inbagg", data) inbagg.default <- function(formula, data,...) { stop(paste("Do not know how to handle objects of class", class(data))) } inbagg.data.frame <- function(formula, data, pFUN=NULL, cFUN=list(model = NULL, predict = NULL, training.set = NULL), nbagg = 25, ns = 0.5, replace = FALSE, ...) { if(!is.function(cFUN)) { if(is.null(cFUN$model)) { cFUN$model <- function(formula, data) rpart(formula, data, control = rpart.control(minsplit=2, cp=0, xval=0)) if(is.null(cFUN$predict)) cFUN$predict <- function(object, newdata) predict(object, newdata, type = "class") if(is.null(cFUN$training.set)) cFUN$trainig.set <- "fitted.bag" } } ##check formula if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) ##editing formula if(length(formula[[2]])==3) { if(!is.function(cFUN)) { if (is.null(cFUN$formula)) y.formula <- as.formula(formula[[2]]) else y.formula <- cFUN$formula } w.formula <- XX~YY w.formula[[2]] <- formula[[2]][[3]] w.formula[[3]] <- formula[[3]] response <- paste(formula[[2]][[2]]) w.names <- attr(terms(as.formula(formula[[2]]), data = data), "term.labels") x.names <- attr(terms(as.formula(formula), data = data), "term.labels") if((length(x.names) == 1) && x.names == ".") x.names <- colnames(data)[!(colnames(data) %in% c(response, w.names))] y <- data[, response] X <- data[, x.names] W <- data[, w.names] if(is.null(dim(X))) X <- matrix(X, ncol = 1, dimnames = list(rownames(W), x.names)) if(is.null(dim(W))) W <- matrix(W, ncol = 1, dimnames = list(rownames(X), w.names)) if(is.function(cFUN)) { y.formula <- as.formula(paste(formula[[2]][[2]], "~", paste(c(x.names, w.names), collapse = "+"))) fixed.function <- cFUN cFUN <- list() cFUN$fixed.function <- fixed.function } cFUN$formula <- y.formula } else { stop(paste("Specified formula has to be of type y~x~w")) } ##remove settings of training.set if(is.null(pFUN$training.set)) w.training.set <- "oob" else w.training.set <- pFUN$training.set[1] pFUN$training.set <- NULL if(is.null(cFUN$training.set)) y.training.set <- "fitted.bag" else y.training.set <- cFUN$training.set[1] cFUN$training.set <- NULL bcontrol <- list(nbagg = nbagg, ns = length(y)*ns, replace = replace) if(is.null(w.formula)) stop("no formula for prediction model specified") ##formula.list : list of lists which specify an abitrary number of models for intermediate variables: ##w1.1, w2.1, w3.1, ...., w2.1, w2.2, w3.1, .... where 'w*' is the variable and '.*' describes the model P <- length(pFUN) number.models <- c() for(i in 1:P) { if(is.null(pFUN[[i]]$formula)) pFUN[[i]]$formula <- w.formula number.models <- c(number.models, paste(attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels"), ".", i, sep = "")) } formula.list <- vector(mode = "list", length= length(number.models)) names(formula.list) <- paste(number.models) for(i in 1:P) { res <- list() Qi <- length(attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels")) for(j in 1:Qi) { res$formula <- w.formula res$formula[[2]] <- as.name(attr(terms(res$formula[-3], data = data), "term.labels")[j]) res$formula[[3]] <- pFUN[[i]]$formula[[3]] if(res$formula[[3]] == ".") res$formula <- as.formula(paste(res$formula[[2]], "~", paste(x.names, collapse= "+"))) res$model <- pFUN[[i]]$model res$predict <- pFUN[[i]]$predict formula.list[[paste(res$formula[[2]], ".", i, sep = "")]] <- res } } ##apply res <- workhorse.inbagg(object = formula.list, y = y, X = X, W = W, cFUN = cFUN, w.training.set = w.training.set, y.training.set = y.training.set, bcontrol = bcontrol, ...) RET <- list(mtrees = res, y = y, W = W, X = X) class(RET) <- "inbagg" RET } print.inbagg <- function(x, ...) { q <- length(x$mtrees) intermediates <- attr(x$W, "names") text.intermediates <- paste("Indirect bagging, with", q, "bootstrap samples and intermediate variables: \n", paste(intermediates, collapse = " ")) cat("\n", text.intermediates, "\n") } summary.inbagg <- function(object, ...) { class(object) <- "summary.inbagg" object } print.summary.inbagg <- function(x, ...) { q <- length(x$mtrees) intermediates <- attr(x$W, "names") text.intermediates <- paste("Indirect bagging, with", q, "bootstrap samples and intermediate variables:", paste(intermediates, collapse = " ")) cat("\n", text.intermediates, "\n") for(i in 1:length(x)) { print(x$mtrees[[i]]) } } ipred/R/rsurv.R0000644000176200001440000000374413461342153013074 0ustar liggesusers# $Id: rsurv.R,v 1.5 2003/03/31 08:44:16 peters Exp $ rsurv <- function(N, model=c("A", "B", "C", "D", "tree"), gamma=NULL, fact=1, pnon=10, gethaz=FALSE) { model <- match.arg(model) X <- matrix(runif(N*5), ncol=5) colnames(X) <- paste("X", 1:ncol(X), sep="") switch(model, "A" = { time <- rexp(N) haz <- rep(1, N) }, "B" = { hazard <- as.numeric(X[,1] <= 0.5 & X[,2] > 0.5) time <- rexp(N) time[hazard == 1] <- rexp(sum(hazard==1), exp(3)) haz <- rep(1, N) haz[hazard == 1] <- exp(3) }, "C" = { hazard <- 3*X[,1] + X[,2] haz <- exp(hazard) time <- sapply(haz, rexp, n=1) }, "D" = { hazard <- 3*X[,1] - 3*X[,2] + 4*X[,3] - 2*X[,4] haz <- exp(hazard) time <- sapply(haz, rexp, n=1) }, "tree" = { hazard <- rep(0, nrow(X)) hazard[(X[,1] <= 0.5 & X[,2] <= 0.5)] <- 0 hazard[(X[,1] <= 0.5 & X[,2] > 0.5 & X[,4] <= 0.5)] <- 1 hazard[(X[,1] <= 0.5 & X[,2] > 0.5 & X[,4] > 0.5)] <- 0 hazard[(X[,1] > 0.5 & X[,3] <= 0.5 & X[,5] <= 0.3)] <- 1 hazard[(X[,1] > 0.5 & X[,3] <= 0.5 & X[,5] > 0.3)] <- 2 hazard[(X[,1] > 0.5 & X[,3] > 0.5 & X[,4] <= 0.7)] <- 2 hazard[(X[,1] > 0.5 & X[,3] > 0.5 & X[,4] > 0.7)] <- 3 hazard <- hazard * fact haz <- exp(hazard) time <- sapply(haz, rexp, n=1) if (pnon > 0) X <- cbind(X, matrix(runif(N*pnon), ncol=pnon)) colnames(X) <- paste("X", 1:ncol(X), sep="") }) if (!is.null(gamma)) censtime <- runif(N, min=0, max=gamma) else censtime <- Inf cens <- as.numeric(time <= censtime) time <- pmin(time, censtime) simdat <- as.data.frame(cbind(time, cens, X)) if (gethaz) attr(simdat, "hazard") <- haz return(simdat) } ipred/R/checkfunArgs.R0000644000176200001440000000152013461342153014304 0ustar liggesusers# $Id: checkfunArgs.R,v 1.1 2003/02/17 09:49:31 hothorn Exp $ checkfunArgs <- function(fun, type=c("model", "predict")) { # check for appropriate arguments of user-supplied function "fun" # this will not work for generics in R < 1.7.0 and therefore not used by # now type <- match.arg(type) if (!is.function(fun)) { warning("fun is not a function") return(FALSE) } funargs <- formals(fun) switch(type, "model"={ if (!all(names(funargs)[1:2] %in% c("formula", "data"))) { warning("fun is not a function with at least 'formula' and 'data' arguments") return(FALSE) } else { return(TRUE) } }, "predict"={ if (length(funargs) < 2) { warnings("fun is not a function with at least 'object' and 'newdata' arguments") return(FALSE) } else { return(TRUE) } }) } ipred/R/predict.inbagg.R0000644000176200001440000000231213461342153014561 0ustar liggesuserspredict.inbagg <- function(object, newdata, ...) { if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata) if(any(names(object$W) %in% names(newdata))) newdata <- newdata[!(names(newdata) %in% names(object$W))] NBAGG <- length(object$mtrees) N <- nrow(newdata) classes <- levels(object$y) vote <- matrix(0, nrow=N, ncol=length(classes)) for(i in 1:NBAGG) { intermed <- object$mtrees[[i]]$bfct(newdata) # XX <- data.frame(newdata, intermed) if(!is.null(object$mtrees[[i]]$btree$fixed.function)) { names(intermed) <- sub(".[0-9]$", "", names(intermed)) XX <- data.frame(newdata, intermed) # names(XX)[(ncol(XX)-ncol(intermed)+1):ncol(XX)] <- sub(".[0-9]$", "", names(intermed)) res <- object$mtrees[[i]]$btree$fixed.function(XX) } else { XX <- data.frame(newdata, intermed) if(is.null(object$mtrees[[i]]$btree$predict)) { res <- try(predict(object$mtrees[[i]]$btree$model, newdata = XX, ...)) } else { res <- try(object$mtrees[[i]]$btree$predict(object$mtrees[[i]]$btree$model, newdata = XX, ...)) } } res <- cbind(1:N, res) vote[res] <- vote[res] +1 } RET <- factor(classes[apply(vote, 1, uwhich.max)]) RET } ipred/R/slda.R0000644000176200001440000000744613461342153012641 0ustar liggesusers# $Id: slda.R,v 1.9 2005/06/29 08:50:28 hothorn Exp $ # stabilized linear discriminant analysis according to Laeuter & Kropf slda <- function(y, ...) UseMethod("slda") slda.default <- function(y, ...) stop(paste("Do not know how to handle objects of class", class(data))) slda.formula <- function(formula, data, subset, na.action=na.rpart, ...) { cl <- match.call() if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m[[1]] <- as.name("model.frame") m$... <- NULL m <- eval(m, parent.frame()) Terms <- attr(m, "terms") grouping <- model.extract(m, "response") x <- model.matrix(Terms, m) xvars <- as.character(attr(Terms, "variables"))[-1] if ((yvar <- attr(Terms, "response")) > 0) xvars <- xvars[-yvar] xlev <- if (length(xvars) > 0) { xlev <- lapply(m[xvars], levels) xlev[!sapply(xlev, is.null)] } xint <- match("(Intercept)", colnames(x), nomatch=0) if(xint > 0) x <- x[, -xint, drop=FALSE] RET <- slda(y=grouping, X=x, ...) RET$terms <- Terms RET$call <- match.call() RET$contrasts <- attr(x, "contrasts") RET$xlevels <- xlev attr(RET, "na.message") <- attr(m, "na.message") if(!is.null(attr(m, "na.action"))) RET$na.action <- attr(m, "na.action") RET } slda.factor <- function(y, X, q=NULL, ...) { p <- ncol(X) # substract global mean Xnull <- X - apply(X, 2, mean) if (!is.null(q)) { if (floor(q) != q) stop("q is not an integer") if (q > p) { q <- p warning("q is greater ncol(X), using q = ncol(X)") } if (q < 1) { q <- 1 warning("q is less than 1, using q = 1") } } # this is S_0 in Kropf (2000) Snull <- cov(Xnull) ewp <- svd(solve(diag(diag(Snull), ncol = ncol(Snull)))%*%Snull) if (!is.complex(ewp$d)) { # determine q by the number of eigenvalues > 1 if (is.null(q)) q <- sum(ewp$d > 1) D <- ewp$v[,1:q] if (q == 1) D <- as.matrix(D) # Xstab is still spherically distributed (Fang & Zhang, Laeuter, Kropf & # Glimm)! } else { D <- diag(p) } Xstab <- as.matrix(X) %*% D colnames(Xstab) <- paste("V", 1:ncol(Xstab), sep="") mylda <- lda(Xstab, grouping = y, ...) RET <- list(scores = D, mylda = mylda) class(RET) <- "slda" RET } predict.slda <- function(object, newdata, ...) { if(!inherits(object, "slda")) stop("object not of class slda") if(!is.null(Terms <- object$terms)) { # # formula fit (only) if(missing(newdata)) newdata <- model.frame(object) else { newdata <- model.frame(as.formula(delete.response(Terms)), newdata, na.action=function(x) x, xlev = object$xlevels) } x <- model.matrix(delete.response(Terms), newdata, contrasts = object$contrasts) xint <- match("(Intercept)", colnames(x), nomatch=0) if(xint > 0) x <- x[, -xint, drop=FALSE] } else { stop("object has no terms element") } if(ncol(x) != nrow(object$scores)) stop("wrong number of variables") # : check for variable names! # if(length(colnames(x)) > 0 && # any(colnames(x) != dimnames(object$means)[[2]])) # warning("Variable names in newdata do not match those in object") # X <- x %*% object$scores if (inherits(object$mylda, "lda")) return(predict(object$mylda, newdata=as.data.frame(X), ...)) else stop(paste("Do not know how to predict from objects of class", class(object$mylda))) } ipred/R/ssubset.R0000644000176200001440000000177113461342153013401 0ustar liggesusers ssubset <- function(y, k, strat=TRUE) { if (!is.factor(y)) stop("y is not of class factor") N <- length(y) nlevel <- table(y) nindx <- list() indx <- 1:N outindx <- list() if (strat) { for (j in 1:length(nlevel)) nindx <- c(nindx, list(indx[which(y == levels(y)[j])])) kmat <- kfoldcv(k, N, nlevel) for (i in 1:k) { sset <- kmat[,i] kindx <- c() for (j in 1:length(nlevel)) { if (i > 1) kindx <- c(kindx, nindx[[j]][(sum(kmat[j, 1:(i-1)])+1):sum(kmat[j,1:i])]) else kindx <- c(kindx, nindx[[j]][1:kmat[j,1]]) } kindx <- kindx[!is.na(kindx)] outindx <- c(outindx, list(kindx)) } return(outindx) } else { kmat <- kfoldcv(k, N) nindx <- indx for (i in 1:k) { if (i > 1) outindx <- c(outindx, list(nindx[(sum(kmat[1:(i-1)])+1):sum(kmat[1:i])])) else outindx <- c(outindx, list(nindx[1:kmat[1]])) } } return(outindx) } ipred/R/irpart.R0000644000176200001440000000302213461342153013201 0ustar liggesusers# # use update to fit multiple trees to bootstrap samples # irpart <- function(formula, data=NULL, weights, subset, na.action=na.rpart, method, model=FALSE, x=FALSE, y=TRUE, parms, control, cost, bcontrol, ...) { mc <- match.call() mc$bcontrol <- NULL mc[[1]] <- as.name("rpart") m <- match.call(expand.dots=FALSE) m$model <- m$method <- m$control <- m$bcontrol <- NULL m$x <- m$y <- m$parms <- m$... <- NULL m$cost <- NULL m$na.action <- na.action m[[1]] <- as.name("model.frame.default") m <- eval(m, parent.frame()) init_tree <- eval(mc, parent.frame()) nobs <- length(init_tree$where) if (missing(weights)) { weights <- rep(1.0, nobs) } else { warning("weights argument ignored in irpart") } yclasses <- c(class = "sclass", exp = "ssurv", anova = "sreg", poisson = "sreg") # # Bagging: repeat this several times! # if (is.null(bcontrol)) stop("bcontrol not given") mod <- vector(mode="list", length=bcontrol$nbagg) for (b in 1:bcontrol$nbagg) { if (bcontrol$nbagg > 1) bindx <- sample(1:nobs, bcontrol$ns, replace=bcontrol$replace) else bindx <- 1:nobs tab <- tabulate(bindx, nbins = nobs) mc$data <- m[bindx,,drop = FALSE] ### tab * weights ans <- eval(mc, parent.frame()) # return the appropriate class this <- list(bindx = bindx, btree = ans) class(this) <- yclasses[init_tree$method] mod[[b]] <- this } mod } ipred/R/prune.bagging.R0000644000176200001440000000105713461342153014434 0ustar liggesusers# $Id: prune.bagging.R,v 1.2 2002/09/12 08:59:13 hothorn Exp $ prune.classbagg <- function(tree, cp=0.01,...) { for(i in 1:length(tree$mtrees)) tree$mtrees[[i]]$btree <- prune( tree$mtrees[[i]]$btree, cp=cp, ...) tree } prune.regbagg <- function(tree, cp=0.01,...) { for(i in 1:length(tree$mtrees)) tree$mtrees[[i]]$btree <- prune( tree$mtrees[[i]]$btree, cp=cp, ...) tree } prune.survbagg <- function(tree, cp=0.01,...) { for(i in 1:length(tree$mtrees)) tree$mtrees[[i]]$btree <- prune( tree$mtrees[[i]]$btree, cp=cp, ...) tree } ipred/R/ipredknn.R0000644000176200001440000000520713461342153013521 0ustar liggesusers# $Id: ipredknn.R,v 1.5 2005/06/29 08:50:28 hothorn Exp $ # k-NN compatible with the fit(formula) - predict(object) framework ipredknn <- function(formula, data, subset, na.action, k=5, ...) { cl <- match.call() if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m[[1]] <- as.name("model.frame") m$... <- NULL m$k <- NULL m <- eval(m, parent.frame()) Terms <- attr(m, "terms") y <- model.extract(m, "response") x <- model.matrix(Terms, m) xvars <- as.character(attr(Terms, "variables"))[-1] if ((yvar <- attr(Terms, "response")) > 0) xvars <- xvars[-yvar] xlev <- if (length(xvars) > 0) { xlev <- lapply(m[xvars], levels) xlev[!sapply(xlev, is.null)] } xint <- match("(Intercept)", colnames(x), nomatch=0) if(xint > 0) x <- x[, -xint, drop=FALSE] RET <- list(learn=list(y=y, X=x)) RET$k <- k RET$terms <- Terms RET$call <- match.call() RET$contrasts <- attr(x, "contrasts") RET$xlevels <- xlev attr(RET, "na.message") <- attr(m, "na.message") if(!is.null(attr(m, "na.action"))) RET$na.action <- attr(m, "na.action") class(RET) <- "ipredknn" RET } predict.ipredknn <- function(object, newdata, type=c("prob", "class"), ...) { type <- match.arg(type) if(!inherits(object, "ipredknn")) stop("object not of class ipredknn") if(!is.null(Terms <- object$terms)) { # # formula fit (only) if(missing(newdata)) newdata <- model.frame(object) else { newdata <- model.frame(as.formula(delete.response(Terms)), newdata, na.action=function(x) x, xlev = object$xlevels) } x <- model.matrix(delete.response(Terms), newdata, contrasts = object$contrasts) xint <- match("(Intercept)", colnames(x), nomatch=0) if(xint > 0) x <- x[, -xint, drop=FALSE] } else { stop("object has no terms element") } # : check for variable names # if(length(colnames(x)) > 0 && # any(colnames(x) != dimnames(object$means)[[2]])) # warning("Variable names in newdata do not match those in object") # RET <- knn(object$learn$X, x, object$learn$y, k=object$k, prob=TRUE) if (type=="prob") return(attr(RET, "prob")) else return(RET) } ipred/R/predict.bagging.R0000644000176200001440000002106614120334207014731 0ustar liggesusers# $Id: predict.bagging.R,v 1.17 2009/03/27 16:18:38 hothorn Exp $ uwhich.max <- function(x) { # need to determine all maxima in order to sample from them wm <- (1:length(x))[x == max(x)] if (length(wm) > 1) wm <- wm[sample(length(wm), 1)] wm } predict.classbagg <- function(object, newdata=NULL, type=c("class", "prob"), aggregation=c("majority", "average", "weighted"), ...) { type <- match.arg(type) agg <- match.arg(aggregation) if (missing(newdata)) { if (length(object$mtrees) < 10) stop("cannot compute out-of-bag predictions for small number of trees") OOB <- TRUE if (!is.null(object$X)) newdata <- object$X else stop("cannot compute out-of-bag predictions without object$X!") } else { OOB <- FALSE } if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) N <- nrow(newdata) if (!object$comb) { tree <- object$mtrees[[1]]$btree Terms <- delete.response(tree$terms) act <- (tree$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(tree, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } classes <- levels(object$y) switch(agg, "majority" = { vote <- matrix(0, nrow=N, ncol=length(classes)) for (i in 1:length(object$mtrees)) { if (OOB) { bindx <- object$mtrees[[i]]$bindx if (!is.null(object$mtrees[[i]]$bfct)) stop("cannot compute out-of-bag estimate for combined models!") pred <- predict(object$mtrees[[i]], newdata, type="class") tindx <- cbind((1:N), pred)[-bindx,] } else { tindx <- cbind(1:N, predict(object$mtrees[[i]], newdata, type="class")) } vote[tindx] <- vote[tindx] + 1 } if (type=="class") { RET <- factor(classes[apply(vote, 1, uwhich.max)], levels = classes, labels = classes) } else { RET <- vote/apply(vote, 1, sum) colnames(RET) <- classes } }, "average" = { cprob <- matrix(0, nrow=N, ncol=length(classes)) if (OOB) ncount <- rep(0,N) else ncount <- length(object$mtrees) for (i in 1:length(object$mtrees)) { if (OOB) { bindx <- object$mtrees[[i]]$bindx pred <- predict(object$mtrees[[i]], newdata, type="prob")[-bindx,] tindx <- (1:N)[-bindx] ncount[tindx] <- ncount[tindx] + 1 } else { pred <- predict(object$mtrees[[i]], newdata, type="prob") tindx <- 1:N } cprob[tindx,] <- cprob[tindx,] + pred } switch(type, "class" = { RET <- factor(classes[apply(cprob, 1, uwhich.max)], levels = classes, labels = classes) }, "prob" = { ncount[ncount < 1] <- NA RET <- cprob / ncount colnames(RET) <- classes }) }, "weighted" = { agglsample <- matrix(0, ncol=length(classes), nrow=N) for (i in 1:length(object$mtrees)) { bdata <- object$y[object$mtrees[[i]]$bindx] newpart <- getpartition(object$mtrees[[i]], newdata) oldpart <- object$mtrees[[i]]$btree$where if (OOB) tindx <- (1:N)[-object$mtrees[[i]]$bindx] else tindx <- 1:N for (j in tindx) { aggobs <- table(bdata[oldpart == newpart[j]]) agglsample[j,] <- agglsample[j,] + aggobs } } switch(type, "class" = { RET <- c() for (j in 1:N) RET <- c(RET, uwhich.max(agglsample[j,])) RET <- factor(classes[RET], levels = classes, labels = classes) }, "prob" = { RET <- agglsample / apply(agglsample, 1, sum) colnames(RET) <- classes }) }) RET } predict.sclass <- function(object, newdata=NULL, type=c("class", "prob"), ...) { if (!is.null(object$bfct)) newdata <- cbind(newdata, object$bfct(newdata)) pred <- predict.irpart(object$btree, newdata, type=type) RET <- pred if (type == "class") RET <- as.integer(pred) if (type == "prob" && is.vector(pred)) RET <- cbind(pred, 1 - pred) RET } predict.regbagg <- function(object, newdata=NULL, aggregation=c("average", "weighted"), ...) { agg <- match.arg(aggregation) if (missing(newdata)) { if (length(object$mtrees) < 10) stop("cannot compute out-of-bag predictions for small number of trees") OOB <- TRUE if (!is.null(object$X)) newdata <- object$X else stop("cannot compute out-of-bag predictions without object$X!") } else { OOB <- FALSE } if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) N <- nrow(newdata) if (!object$comb) { tree <- object$mtrees[[1]]$btree Terms <- delete.response(tree$terms) act <- (tree$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(tree, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } switch(agg, "average" = { cprob <- rep(0, N) if (OOB) ncount <- rep(0,N) else ncount <- length(object$mtrees) for (i in 1:length(object$mtrees)) { if (OOB) { bindx <- object$mtrees[[i]]$bindx if (!is.null(object$mtrees[[i]]$bfct)) stop("cannot compute out-of-bag estimate for combined models!") pred <- predict(object$mtrees[[i]], newdata)[-bindx] tindx <- (1:N)[-bindx] ncount[tindx] <- ncount[tindx] + 1 } else { pred <- predict(object$mtrees[[i]], newdata) tindx <- 1:N } cprob[tindx] <- cprob[tindx] + pred } ncount[ncount < 1] <- NA RET <- cprob / ncount }, "weighted" = { agglsample <- rep(0, N) ncount <- rep(0, N) for (i in 1:length(object$mtrees)) { bdata <- object$y[object$mtrees[[i]]$bindx] newpart <- getpartition(object$mtrees[[i]], newdata) oldpart <- object$mtrees[[i]]$btree$where if (OOB) tindx <- (1:N)[-object$mtrees[[i]]$bindx] else tindx <- 1:N for (j in tindx) { aggobs <- bdata[oldpart == newpart[j]] agglsample[j] <- agglsample[j] + sum(aggobs) ncount[j] <- ncount[j] + length(aggobs) } } ncount[ncount < 1] <- NA RET <- agglsample / ncount }) RET } predict.sreg <- function(object, newdata=NULL, ...) { if (!is.null(object$bfct)) newdata <- cbind(newdata, object$bfct(newdata)) predict.irpart(object$btree, newdata) } predict.survbagg <- function(object, newdata=NULL, ...) { if (missing(newdata)) { if (length(object$mtrees) < 10) stop("cannot compute out-of-bag predictions for small number of trees") OOB <- TRUE if (!is.null(object$X)) newdata <- object$X else stop("cannot compute out-of-bag predictions without object$X!") } else { OOB <- FALSE } if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) N <- nrow(newdata) if (!object$comb) { tree <- object$mtrees[[1]]$btree Terms <- delete.response(tree$terms) act <- (tree$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(tree, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } agglsample <- list() aggcens <- list() for (j in 1:N) { agglsample <- c(agglsample, list(c())) aggcens <- c(aggcens, list(c())) } for (i in 1:length(object$mtrees)) { bdata <- object$y[object$mtrees[[i]]$bindx] newpart <- getpartition(object$mtrees[[i]], newdata) oldpart <- object$mtrees[[i]]$btree$where if (OOB) { if (!is.null(object$mtrees[[i]]$bfct)) stop("cannot compute out-of-bag estimate for combined models!") tindx <- (1:N)[-object$mtrees[[i]]$bindx] } else { tindx <- 1:N } for (j in tindx) { aggobs <- bdata[oldpart == newpart[j],1] agglsample[[j]] <- c(agglsample[[j]], aggobs) aggobs <- bdata[oldpart == newpart[j],2] aggcens[[j]] <- c(aggcens[[j]], aggobs) } } RET <- list() for (j in 1:N) RET <- c(RET, list(survfit(Surv(agglsample[[j]], aggcens[[j]]) ~ 1))) RET } getpartition <- function(object, newdata=NULL) { if (!is.null(object$bfct)) { newdata <- cbind(newdata, object$bfct(newdata)) Terms <- delete.response(object$btree$terms) act <- (object$btree$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(object$btree, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } getFromNamespace("pred.rpart", ns = "rpart")(object$btree, newdata) } ipred/R/mypredict.lm.R0000644000176200001440000000126413461342153014315 0ustar liggesusers# $Id: mypredict.lm.R,v 1.7 2003/04/02 11:22:49 peters Exp $ mypredict.lm <- function(object, newdata) { xn <- as.data.frame(newdata) test <- attr(terms(object), "term.labels") xn <- xn[,test] if (!is.null(nrow(xn))) { pred <- rep(NA, nrow(xn)) names(pred) <- row.names(xn) } else { pred <- NA names(pred) <- "1" } # evaluate na.omit (delete lines containing NA) xnn <- na.omit(xn) # attr(xnn, "na.action") returns which na.action is # evaluated, lines and corresponding row.name where NAs occur if(is.null(attr(xnn, "na.action"))) pred <- predict(object, xnn) else pred[-attr(xnn, "na.action")] <- predict(object, xnn) pred } ipred/R/bagging.R0000644000176200001440000000225513461342153013305 0ustar liggesusers# $Id: bagging.R,v 1.19 2005/06/29 08:50:28 hothorn Exp $ bagging <- function(formula, data, ...) UseMethod("bagging", data) bagging.default <- function(formula, data, ...) stop(paste("Do not know how to handle objects of class", class(data))) bagging.data.frame <- function(formula, data, subset, na.action=na.rpart, ...) { cl <- match.call() if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m[[1]] <- as.name("model.frame") m$... <- NULL mf <- eval(m, parent.frame()) response <- attr(attr(mf, "terms"), "response") # just extract the data.frame, no handling of contrasts or NA's here. # this is done by rpart or the user supplied methods DATA <- list(y = mf[,response], X = mf[,-response, drop = FALSE]) names(DATA) <- c("y", "X") y <- do.call("ipredbagg", c(DATA, list(...))) y$call <- cl return(y) } ipred/MD50000644000176200001440000000761714120340712011672 0ustar liggesusers4f917ef1d256ef4a813073b4c3dd7fb2 *DESCRIPTION 15b9a39f2d45f6a431b66ad752f2be5a *NAMESPACE bd957f8c9597e280c5bc486db1b50fed *R/bagging.R c55f9e65194e69cac7b3f6a14819f7ee *R/bootest.R d8b949efde7413d8d639a64be6528403 *R/checkfunArgs.R 85963ba4287c3ffd4c84dbe8a48d9c28 *R/csurv.R 16fcd24ff394cdec7d929f8acacf5823 *R/cv.R b17e241d115e8e9491c5e07e612295fc *R/errorest.R f3f9979d3d1de170acc34bb1cfd15ef6 *R/inbagg.R 9bc7ff831e43ae443c3afd0015f9b45e *R/inclass.R 96677befab2b4535bc23d755ad5f6c92 *R/ipredbagg.R d22859a559e876b2e478d366b705d0a1 *R/ipredknn.R a98cc646bc7f7a9173d511917bc2db34 *R/irpart.R 6100af39d915430ab680a3889d52d4ec *R/kfoldcv.R 7d64c8043f1426fe3f41e324dad74be6 *R/mypredict.lm.R 51cb58bde04a793a35b2caf233517f43 *R/predict.bagging.R 1714695c8d4dd8439fc6d1e3f7968e1c *R/predict.inbagg.R 301f8e08bee09b9817e8ffdce0a4c398 *R/predict.inclass.R 8b4b9b3039c4fb86d2c96555b32872a8 *R/predict.irpart.R 5427f43c2b21d2e7f3c6ff2fc521c33a *R/print.R 7fff7c589cfae0b7e57f6af2bf6974f4 *R/prune.bagging.R ea8bb0575e5ee83b6a4d595212906f83 *R/rsurv.R 065dbafb0e228351e7b2a4e25eccfb0d *R/sbrier.R bb92fccd557c101dc3f52a3c7a7a1961 *R/slda.R 51c9a057a29e6be4477aaf23bcce4000 *R/ssubset.R 860d6ab9df7627b349a2fd9ac71237fe *R/varset.R 98dba5e745b93df373d7874c362fd5f8 *build/vignette.rds f83cb6bdc4e6265a64be0914da7979f6 *cleanup b77f49ce74f90948e4d09122c5fac724 *data/DLBCL.rda b07616370b51419752d4219f1f4f9f55 *data/GlaucomaMVF.rda 1f87b4f0d914888b1be71028fef8d316 *data/Smoking.rda e54b730797d71b1483cc26bfb3ea552b *data/dystrophy.rda 45a8a599f130fd94e2bf0ccea723a290 *inst/COPYRIGHTS 16d74f104d9025e5965e12a54797cbfe *inst/NEWS 82eeec327b400aadc3c7fe0905483d8a *inst/doc/ipred-examples.R 72c8610d330afc2376692437ffd0b5e0 *inst/doc/ipred-examples.Rnw fe1a811bb710a60158ab0a0884f1af53 *inst/doc/ipred-examples.pdf ef01004837ec42585a18b1292cc00952 *man/DLBCL.Rd 3cf72f2749b7029a0b8b685461d99d3c *man/GlaucomaMVF.Rd c7a9f9b81fb440185fccab54c289864e *man/Smoking.Rd 716e562e6d5449abc3bbf954ee18a135 *man/bagging.Rd f4694ae7448c30a49814bc23fee777a7 *man/bootest.Rd 920bf08095b01ae4d943fbff21eedb57 *man/control.errorest.Rd b9874254d1a440ce7d1373ddb88ed18b *man/cv.Rd 54ef878e658ab5d41e3a54642ca9a451 *man/dystrophy.Rd ebb22088953b88f1a62151d295fc2ebd *man/errorest.Rd ca29c817b98efbef404f5b8467c796c3 *man/inbagg.Rd ceeaae7b39770e87036bd86c4b899a92 *man/inclass.Rd d34608ac849321130774fc768f28c475 *man/ipred-internal.Rd 86ea0f01231c482541c1ae86fa6f1652 *man/ipredknn.Rd 37eee681bff7ad10df28bb18eab6cf2e *man/kfoldcv.Rd 941c6c8d91a74ae8a046811346664b0b *man/mypredict.lm.Rd d1705bf690f5e92abf7123ca0e0ad2b7 *man/predict.bagging.Rd b3dbc86c0c9cab4b549b162b75248c31 *man/predict.inbagg.Rd b80ad2198613b405333aef1e6e3cc504 *man/predict.inclass.Rd 0da9ab0fcef3c03dc6f04309c8830b83 *man/predict.ipredknn.Rd d45eeb09998f42d52f180a46296b9744 *man/predict.slda.Rd d02437884a49f5b9937791ed6d07c53b *man/print.bagging.Rd 755e22b8d9799ff3d55f7364be332931 *man/print.cvclass.Rd b1ae5acecd41d145898b794567a38231 *man/print.inbagg.Rd 0e78f227c282e6474a5b504a74b97fe2 *man/print.inclass.Rd 8903ad93aa5f9f092faab0db112733bd *man/prune.bagging.Rd d88328ca9e52b501e01d1b291de8f16d *man/rsurv.Rd f617299850199477617f8c80f9967fae *man/sbrier.Rd ac34c8ccf9d10e1e15e7a2138c14b2cb *man/slda.Rd 9000c36afc63d72103df0e5c41dfffc5 *man/summary.bagging.Rd dd36ca065d305401e0326680b5cda910 *man/summary.inbagg.Rd 8ce9a1f1379d492df5aea55687b6b95c *man/summary.inclass.Rd 79fed002bac6fba4b49458b352873e8c *man/varset.Rd 0ac59b9f3655966e0fb52ca8a8b2b27a *src/SdiffKM.c 4c8242f0f0243ec116d9d1dd8ed99150 *src/init.c 48d16c5ed9e2eebd086e1244a0839308 *tests/ipred-bugs.R ee8dd469906916e9c86b5580f51b8bc0 *tests/ipred-bugs.Rout.save 6f1c14a02814a289c65b56125e576d3d *tests/ipred-segfault.R c28fb98cadd385924acb0c03856e4149 *tests/ipred-smalltest.R 2d85b750ceb36f266749f63921c26fa1 *tests/ipred-smalltest.Rout.save 72c8610d330afc2376692437ffd0b5e0 *vignettes/ipred-examples.Rnw c642c366927d8bf2522e3c078f6e34a2 *vignettes/ipred.bib ipred/inst/0000755000176200001440000000000014120335534012332 5ustar liggesusersipred/inst/doc/0000755000176200001440000000000014120335534013077 5ustar liggesusersipred/inst/doc/ipred-examples.R0000644000176200001440000001121414120335533016137 0ustar liggesusers### R code from vignette source 'ipred-examples.Rnw' ################################################### ### code chunk number 1: preliminaries ################################################### options(prompt=">", width=50) set.seed(210477) ################################################### ### code chunk number 2: bagging ################################################### library("ipred") library("rpart") library("MASS") data("GlaucomaM", package="TH.data") gbag <- bagging(Class ~ ., data = GlaucomaM, coob=TRUE) ################################################### ### code chunk number 3: print-bagging ################################################### print(gbag) ################################################### ### code chunk number 4: double-bagging ################################################### scomb <- list(list(model=slda, predict=function(object, newdata) predict(object, newdata)$x)) gbagc <- bagging(Class ~ ., data = GlaucomaM, comb=scomb) ################################################### ### code chunk number 5: predict.bagging ################################################### predict(gbagc, newdata=GlaucomaM[c(1:3, 99:102), ]) ################################################### ### code chunk number 6: indirect.formula ################################################### data("GlaucomaMVF", package="ipred") GlaucomaMVF <- GlaucomaMVF[,-63] formula.indirect <- Class~clv + lora + cs ~ . ################################################### ### code chunk number 7: indirect.fit ################################################### classify <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } fit <- inclass(formula.indirect, pFUN = list(list(model = lm)), cFUN = classify, data = GlaucomaMVF) ################################################### ### code chunk number 8: print.indirect ################################################### print(fit) ################################################### ### code chunk number 9: predict.indirect ################################################### predict(object = fit, newdata = GlaucomaMVF[c(1:3, 86:88),]) ################################################### ### code chunk number 10: bagging.indirect ################################################### mypredict.rpart <- function(object, newdata) { RES <- predict(object, newdata) RET <- rep(NA, nrow(newdata)) NAMES <- rownames(newdata) RET[NAMES %in% names(RES)] <- RES[NAMES[NAMES %in% names(RES)]] RET } fit <- inbagg(formula.indirect, pFUN = list(list(model = rpart, predict = mypredict.rpart)), cFUN = classify, nbagg = 25, data = GlaucomaMVF) ################################################### ### code chunk number 11: plda ################################################### mypredict.lda <- function(object, newdata){ predict(object, newdata = newdata)$class } ################################################### ### code chunk number 12: cvlda ################################################### errorest(Class ~ ., data= GlaucomaM, model=lda, estimator = "cv", predict= mypredict.lda) ################################################### ### code chunk number 13: cvindirect ################################################### errorest(formula.indirect, data = GlaucomaMVF, model = inclass, estimator = "632plus", pFUN = list(list(model = lm)), cFUN = classify) ################################################### ### code chunk number 14: varsel-def ################################################### mymod <- function(formula, data, level=0.05) { # select all predictors that are associated with an # univariate t.test p-value of less that level sel <- which(lapply(data, function(x) { if (!is.numeric(x)) return(1) else return(t.test(x ~ data$Class)$p.value) }) < level) # make sure that the response is still there sel <- c(which(colnames(data) %in% "Class"), sel) # compute a LDA using the selected predictors only mod <- lda(formula , data=data[,sel]) # and return a function for prediction function(newdata) { predict(mod, newdata=newdata[,sel])$class } } ################################################### ### code chunk number 15: varsel-comp ################################################### errorest(Class ~ . , data=GlaucomaM, model=mymod, estimator = "cv", est.para=control.errorest(k=5)) ipred/inst/doc/ipred-examples.pdf0000644000176200001440000024005214120335534016514 0ustar liggesusers%PDF-1.5 % 1 0 obj << /Type /ObjStm /Length 2803 /Filter /FlateDecode /N 52 /First 398 >> stream xZYsF~_1o+ fJJȔ#)?$$aC r~~=8(\$ baȲ)#XP0b-eFYD$L& =B2V$&f"f2d-S㰫Mi-qd:pNaRH 8cvDYG!E6q FIj08),*Ŕd1͏]#0HilAWEHm̌! jܤf#CLb@68 b6LcLG, i11I3L( Ɛ$ra %6ĢKҖ&X %/eU:IKnFy͡w|^⠾x&c|%{‘ZTW={+,M=Y*RP?4,{Ͼ,/l2w4ɷȝb˓CW,XD!.>'WY^M3ګ)EEF_wX%պi+m"-hk_H WQM$q}k}o5~J7o&j&)*+@S:1' Cyqw[(jkSӬM~eHx^fW1H9hx)czn6R?GI`6 `b:%ΏihinQӃr@&| nEH\#=4S%=!^pʮ6y"n:n^#CͭnV?FGrvNNUE֌^ܒ37!?=~;G#,r[=Wd;W9\P+1*7+-lIu|U,#&(l)k/S$i&??z111&F`P(Da蚂;ܪc*4{*wDgD(80TCV ϻѱ#1:W*N 9s1ҮcHb AU_^5Y3ŷ{|7?GO3~c>.eOxIt~/}s3~K~?>׼4/y~Cؙfߛ9y97Ŝ\U64u?p|}We|?/Yk'$a^Rz v'8wUQ$~>(+YUH/Vq ã|>~i]EAK hԾ}m| rhZ)jbs& dLMx$H3<~>w HΤcXofU8o5Mzk1hl`o>lc)芭bNZQڨMZfo_E;jxӿڟ%\fVsM>000 ǗNlH<$C4Kdwkϟt.KΧ6,(k`vF5zH=hRCZLo["4HT$ RQɭp);qYvDIS패^EDFz2Ш"h⏆Sszu&J 8;?<#~x;ݔ,K7R\aۘa\d0HiHvK$_R#y]/U~3$zﮘDfiP J:ÝsLptMD_3fPF˦: ][ɕ$HhՌԮ?2Q*uLW޸֌wx%؎O?qėpl>ӗړ/K7cX"f5n˩6NYCTޟ}zsff,mfmĤ5"_Q[NR)HQsBI(@C6fCFu07V;{eCqTsc.NFbZIW\}#Il_q]an8cWoO^xxpua95J&UMO4}Df>Ex*~9y zjup~OxXKQ۵endstream endobj 54 0 obj << /Subtype /XML /Type /Metadata /Length 1549 >> stream GPL Ghostscript 9.50 2021-09-15T11:22:36+02:00 2021-09-15T11:22:36+02:00 LaTeX with hyperref endstream endobj 55 0 obj << /Type /ObjStm /Length 1891 /Filter /FlateDecode /N 51 /First 405 >> stream xY[s~d:o3꺵\T$=DaʋY$R_4yŞY[Ia LI"Z1'% A425v8&0I'zϼ`J+˼dxüb)X+ed2-pwL| 1AJ[XPL{f:a:F˂eFH`v-=x`D8%83`d&bV1b"9fĺb`?,^@8iRHfxbu) Z;c J Q!u޳ߘT6>^ԺCI 97Ӕyz)bҰY^m9ɛrXS>Z,MMlhٓtKࢸ)#N(ڻBe}^>TfDy5n CJ8ir2.H\Ǐ=-R#tsHM=-%ZVlD0ꦜoˬ' edAFyƟ{lȸA;i@gQ+f8ۿ\\]K~,o`ʌ+Gܼ8/9˫^b 'Uy@n%7yIigğl)+o;2U+My뵑ד/>0M${QNV/=OMM!.Y>ZXǝӢArVhFoy#M>+'G)DI]=%v=nϔzIb 9oW?A09vvԞ2qK_ßV~kTg"l͆cEdnVLc+q:>{_ ,gR|Ik}ffh l mf[qC2rHl%jm%ꦨVAL&R7Yoc,[d.(㼧B#VɌCBM&`/=yS.!M91Yg3%CH qªpijQSy|b%0QR*3Q}#LD-R&sȠZ,PF*>-B5&-bPc~#Ld^LN¢?k@{߮Bֶ¦hVj۽ƪ:M6~4גyO\B;۝!5; KW!hB0_(nj )c&ϕJ-( C1Mz9߿AvXjNVֳ.Rm |WRMey^Dj׋i\C7wUQeR-. kE,{D:yP*a8SW7IaW F\ 3ZY^ $uq`2eҹ5/ %]%#Qcp/ Q*d5(A۞Dُ|aN襏eG5t'y ů[8؉m ;yh||5>k[yD\QGуK9*5>ӵ!e_e{a,ziQrhx8ɧڤ?zPu! }:ޯR2Bj\{baVӨ[iSO__<#&]y%;^E'MTۺ#@mV6[+Ha{;+k6+ЬLCXۆ>QxG}ے$"]a=vֲendstream endobj 107 0 obj << /Filter /FlateDecode /Length 3585 >> stream x[Ks3[9ͦ)W٩U9X9ԊĥDR̠a䔭AYL~zOşysP Mn_W6_]oh6/S1>|wpU{qb>k#>MdLUfbAhZ!YL J8A>LHs/D, Bh΂Pf3>Ң`Ap4:4?ɟP "AxZ!;dJԪC,O~9C'˷g~ )`e!ʇldJAp/k>BjV/5mO4jl9\BL2S !GaRR ( iҨEr *i5!ڒ p,iި x|zM!}D2gX-ruory 7Sen)^~i0q_wQI. -q ";' ٛI^ OQSj9#mr%NzrCN2w^K!} xNf DH rrJ]a-hAv_Iv8ldE`kM.UH\B!A8e52|"$hzˇOI(/d1$\*Cҏu%s c흍WvK#OF#<"lU5B)]Exۄu?*qYrI ?vдoƶ <ɺm%2iJ>]Zmsp-'"X(5!mΜ6vF`UǪ_k }s`]R },f?kl(!{ԥbg=1UTK)Ķ <@n krH[4NAU%f2'Ѵ9ǟ縃lK5WhҒg*UV序{ɚBjK-panS`&0z荏ž.*#vRIRe}v Rq aU7 E7TmVapύԂtB["RB$ nn S]#JA:Alɰf vTZ'[$,.JXXVXc ίW8K31A d%}eQߜU4/ Ϲ,L-GqE4nώk ~_ mLjؽ$?(u!65OY3kFQ-IW3+Q&7.[ne8<>a ),Wa,6[nzE念V\-7k9Zb2 eڀ( " ۗx1 &b6{#UIfekwgm`!M@&9Gs)%\?uY~|;>.כRsIkՅJ.w.x]ۥR Fy'z0j*R)kY~9*Be2R% VYi,oo:Dv-#7ZhI$P.W"Kh{|FjOso.mw!X.,dWd [Jsf"/:*0- X5ܐJ}>M}qJ:l ~oPNjg(x~MSud봁,FI'y}!Q/avOԩ9¦̗ᴉ N:E<,fU=ζrǞS!i#-! Ysn*xT#EȎ:gtORq@N diASq1g/д OϼYFBl96S*\>EogARCƽVJTs*UF~M<`+>V4cL,!`ЈAo/s?Pendstream endobj 108 0 obj << /Filter /FlateDecode /Length 4044 >> stream x[KG9g"6Aر7hw!iFk[/3zeIp!|P*++/9؝ڿ/~=\ݟn09TSL;!ײK\,W'{45g7 !L>؃/~>ǣƧ2IZ0&@rgf!n8") *8t$M1ٔ~k 9$䷵ԨumD%gݠ'YIxIr]5ScH ۠9^],u^/tq +RĸCՂ.0|6oV`bb+1i|F{?oze3~ ߮n~}wТJ a39[|XsijTº9S6 'LcmEhm Œ6_s٦oEDÀi k""$TX.-p72/5 ejݭ1T ~N"|0x=I 3AWK^+OgnMK.ifUh.l8ݾ h*Vlcd6G_r8F'2N^ǞI oR?ښkVbsäTq/7 4{\QȆs+|W _wW^`BK[!RkiԂhޗ!;ô쾭E㕱4As![va 8yeHgyC,yX d&Ef D@4,dĢ*Cj7:἟䍶ic\/e:Vءm_Ě[:jS ;vQP=0V2jhLFS"rtzGsJw"$33?S,%0[~|ሱS9 ~5NNƓ]jJxb]z1 Z~{^V\gJOPau>ͥaV|YQ56ǟf<,Ol ,+ WF..j9pzڞ15Wc1Gḋ=G3GA!~Y."Ȇ#fv$9 Kה!z5FKz , i,r,7.%vԔꙎA/3M Z2>S K}JY&+-!yD޳U}ddcK'j2-9 iZXs>.Op B戒q:l `(S\ϖS>@.:{5\YM;l>b&6I6k(.%;Ԭ$|W =f,6FX\!v\,KC325:rdկ]7|JU(ߢqOA]`/|d9VĠ*ӃV@de$eÌ.y/8d?\WCJ3smiXw)LK5U4YGx| eW#  6,OFHDㅒ }v=6@w5#1N=3(K)ܜe,ŀ21oW\,9rqO}o kwR&޻6ip{N|A+8kTM\93o#z"N }%~sקkgީNM~@(Xg墰kenu$ȄB.juD$Tp!:}[tMiAf&ZEegYB*z$j^T}2t5- ]kgA'뷚˪C+[6ye'wpNrgCqЊ!&z\zzTZi2:E1=GݬLh %}gQFq,e>A. FL-SӼ62Ã3K89etrBm& ߮kQ!Wokal*f,(m;CK5]byi3ɩʨӔ%u"E.7 \q?.tD>ph%,U:e^0邕\[1Jfw2{N=E钯-Ou\Օ"@(kk0nSDk5lUD _k=aJCkHy/ jD>|^(jب e?y:U° }TV ΐzaIݺ/]vθq9QK6{2NǙ^7PPHH3mwTd|#:?ʮ GA M_ %緒[*a2 BQX/"Z$ fV!i>Ɍhww/4q P} hј$`:ЧWrh$4aSM*pIv!^kkN{a)k+HԆA0X  ݘRXUB7ptV1u2?T쎞+`+Wy8w|cX6Jp#!toLQ kl*DMeN]o߂e/Z-J`$y<)c;0‰B Mid6hAKJ"%7n‡v֔qWR+?5&?Q{w#V"awJ?V<}wB}AgQE*t %, Rn*&6dHC_DV'Z}~ 㴎yYJ_|~ɼ0?D6~+B6gQLBG< 4:[{TYOCXWgL0`XB{tK|i/Խ;{)Sw_N=Kn(-v.s=\B;z0%󗹩D_e~I&z7?.D%=-&q7b~=SGTcۼeW,?^4o7s&|񁢍?'῁ԳMJҧlr)sJdG OY(O?El?ul-yq@o ՈАM)Oe,qbVB+}ܽ876܂v?z[atI#xR7k㿞 ۹ 1|{;15q-(cqUN)"`՗|bwhR[w2xC)_ehoendstream endobj 109 0 obj << /Filter /FlateDecode /Length 3508 >> stream xZKs3,fS ޏ8UQʶQ`0R+$I;䷧4f08I&4u?^ 9kO'xÉ_O7/`20Gٛ<>Sgel8=9{^]1o+/{ew*\7*x}w6F-2t52ac7ߧuQ v/A[71Cy^P ߧT/fETs{hGd6( -~;%n~Q5IF < Z ӱ} I)v`,k'U:zc>3@:=H6blz,+SC,,N2\K ʨ,["q]/:;1%]pImYO]e֗#i6@[6t+LpVOZM*kiq'<OL w? WdB)lt|><@t.#E-AȒù^&+$:29cB@#:mi A"@ )VX`g{ŧR!, w*0. .2 ,Y )dj5EſuzV8*If 9>/8gf"~I*b/k|#,7ncʖJs!X.|%)]f6i g ~dJ`2}p5RzҹJ)-0=B*#I*r]]0bf(].5dy+Kv1IZ#D Op.$L!D\i2Xnv^,є=ǰ%LГW![{8*bA,,dq ǒH#Ƅ}:f0LS:b@Kl1re C[!~ȆQ0bT†aew֒, UL"ny>v?cƇ47F[1$R=JddF5 _¡l$e7UHPf?P-U\,“ :ɛc."mg{jpו 1%ؼ+U[x.*oeIHێYzB4BTaZVs%,)Adr(c!/ 9WyUCk뷻y1lcc҇r 7 +8S*KUԃ^9ܡ!!&/2^t#}  ǩU@X3%bU GCC+mmnRpSoK>/gD$_~޸_~c(W[!hZ5b=pVftj]Ka Y鳄;rJO Fbl+t^uIqT}<:@D'T5X [n1[B-;8 RK8 E)7]&_m"%ƕAfP1B2vlkذc¹ )_mΩ=$spdyR k6!b(0*7;*v/QةsLyb,{S~s6P3n(*{ @pD Qgl+Af/O1-p$߲Х#"PVWyBCM%OpD3 sTJYde؁dM47PC_r#mu:}C˥\a9f"z^{r5.6)\O@SZԂہ3Čр8oc*-@{ |RfmSzc BwHʗIHQҘZȒc=C0{4?+*l }(e_Wݓwm&*:d+N[vw'&Cw5/p/b[2gWS7ZB nTGEMJ"X"mM3E Ezs ADz qrߑv٩"|Wȡ9d]sbmB~ZH0F]ՈᇧB]c9ϗ yQȱ[=1^vPdYsB*dߴqCs86Mf]2/[ȭY B9UB21>C~닶l$vA}VȳB.ׅ VU9Jntޖ,3Ӻ-yźUVRr|7=iLYrlNrzOO/ݼzBlPb<ه&В>` T88:Az|CE"ue:LbYU&Ȳtd*$0T]G5SEGSv?.|m6j$&56{U.axчƆ=]|Ih 2Qv9hZ%A*6*j4HǕέmu1*c ׅd(X:Y%k1o! -ܣ "X)2( \Cf_^װP* 'RDwTw2AV5,{xge fm;[U)f ;|Jl 9:e]Eay7;[I=ʭrddN]w72tOFT紐 k !Ox*r`LsnXn\"0i=:vbIgćcQ~;^g+t鉓I6w n,LxJ@낑B +a^B~/>n1s\ִACm5sS[8=0}PպjpvuVLyq~rU4]X/Ns= 2w'Nw/i6nC DDŚ/ 8#57w|,ujQ~\l Q.j\8W/?̪|^*tV$ &7d,Kf6ERϲ ]6IV܂-v<797G5Wh> "͹c=@ v+`-*pw'Kendstream endobj 110 0 obj << /Filter /FlateDecode /Length 3633 >> stream x[Ks3Tv8;]Ð\&wi-Y.eI i_?\t\|p&ηgߟIslg//aa oO+t'[\6?6op\z29>.2ܔEľIx[7| ?Y|?O{>/OTZkqKZqh82*?δ;|CFnIV^ |{xx:{7p DI@fd+Cqz7;I#*h Le)q^>žnOIoC/5Pu*cx5Kc;h/V Yrj+ Rg4~sZ;U/ !}M{6%r gO"jpC+֐XACX43KD=g$m>W8_'u2^Ieim;TLft 1XH(i>͂J5c邇 P~reAG8KK yO`V(W D6><8y#'zΌ)0 \9=ׇh (lvq 38iAٿXسL` "x tWdl ԗ/M 8I2fE8Ϋg&='QONztxk|%)T ̄\y>[`nFrn<ꣁAKa1>#7zQ9J(`63$`ph`~/'Q2!xhӒ"\-JZ8X3zN p*X e@ )!Ѵ13 359ECSQD9͙1 hMM#=+xj8Eq Q2~ywVݰ|n[,aձ]\S*avܱ1phG(^3ŭLB!v4 ^L _H}N!x$ ٩Ѭin !l>ʐeWn$OTfWe%"4`ic vrd;Ta9v4M˙]ony#va ) s)&Ӫ^~[2 o;~(uTcs6 εaY+غ/C[7!ɠҋKS ,)P\v~8vNxǿ++''ofӋ\2O9LApSUSaQyczi>p=Cm 1UA>޵FwΔX(DKՊ(q?D!:ߘLd {XRڛH<-1i=#SU7G)֤j:@9 dc ¢f޶1$4Uzߜ=ǀ5q0X XP%.zWҬFFh"be|Ç_!`^4)p[-2MF21b U1¯NLXa Y,R9X59ޮ_3T [>E -Qɷ0Qi5g}k꓏t#bـ܊0 3^n>Q5w@M;iY:~BL$ҋPrΝ0xtӌ4Db,aZ}[Y qwM(ܟ .<OqOJKIm*)6 p׌ܖ]+[^bƇ2]Y1JĪ OAp/DA0[.)N9dưIEo$ LRnbbVSxRS纇]vNSy1 )oU"s3_ct1)^g#bZ4yf:bI62&? 4 ThJ.Hq3/Lz&EZ0x,|N]ڶwCOYLjvTlyJ J׶JMsn#ُvcOM~duqpR@<ðd3kn/޾AfGJXqZ"yixcD&9RGʣ>ԮfJY쑿'߃FRW&)ƱKG5 op; aG9\N䲔ộ0.F#`<.#` UA62( \-sjy1phjGh}s9I^p\,ì)`+>gŃrĤ3?uU"^IRjZQG_H);V$sj窆]$ wŬ^%D+4 Vv`;)H./\TEߓFW1}It~LAcHiTn%IX@@3vcI%S;^>L 7%\+VZ.++"d&m3j:_α7 .W@+H$GOb9Փ(GP'RoPM%ӑLc$_gcWLϐҭDWo,Gl#`D#JZ,VTٟs+~'cMj,hxn&6"{ȢҀZւHz,olF&C@~3.JF15H03ϥSZd&O(m4<1 J'PC %vD>tM#17 <.k;}9+.~椭;-GpaD#'+$ZG\t < *'o ЩKbTr%븉ņ]Az=[an> stream x \S׶O 'T⩠ V[g+ζZy@YyL$YI!`@@XڪUjNuVNm{{C9go];ʪ%DWu`ˑ"'IQy/a#>V{>ܖ4C)@4?$4&|_HQ3fL0q„swx;tr$oC}p_>0h1}Ǝ?qSޟ:m]F)'jN>RRj5ZO6Pj5LͧQ[xj+ZDMSj)5ZFMSSTj%NPAT_ʆ Q([JL X*DQ`j)%M-"ѡ(`H=}R!oduz'~a^'>>{hO^ ێ}$-nƾxg;o5h/_jja9M/ rݘzvKޑppt@C $0F~NP1Wȿ߱M-PfA(سGM֫@B}@FhW֩f7\Fr F :ǩ4rHrP.n^}pZ[bh'ϒ3X߲R F޺UE=.#˃ćP崫 5iײ/WO]~-!T{L}06ƗD h9e(Ci F$3oCDW^uDg~m11A޽vy]ԊEx|<<l%T#Y^TGs(EdI VAb,a $<۝6{A=zXb9ćHfuCW|k$M~S! )ߥ/5n7$6#:l1s 33ÿgywhE [$qZ14BA?vJM<_![zX`2KJ̒B0j-f51 pHT@}u3MnX! ʊS"]j>Xz?厣@Wxm̌ŸQ+ܔ.3}d_Zo2}9>9m viAotj6`~'9L epȅVhmj\\bjJuUԒ#yu95QCߤXX4gGFpc>~~#\ژ4AuGѓwьL(|^-DQ"g߾#UƖ6ÔA0 ˕ Dv'WβsR.qV=w*`'C-jU ڍýY tƨZhUVZ:TRE?7#eq$IIBMG}J' Y zD24%G>ز`K_]sjv>5juvau#>YԺn8H]bȃ2P7:] 1һCyHNyU _ _Bӟ[_t,Q*%t#?P/h5$ڃ؜> 'uIN7z{Y]D]Jt#/8W7=Q#I^Cʭ;MĢmun7 *b_X4&C}'Zw]b>cs4~F&dOkEES*jD3e:#UJc;$UYBV?BWCkP??Eԯ%T9h.ےI,RYtV(D:झ,Pb3}G 7 zB²dMjR;n {">KH.VuLs{-FOX!2ÓxZ1hrv *Dï>@\ߛ1[4^^.=y D,b>젶RĴg"ktM>N0_ $2Җb_`B.hAw/'o4)P *,CCII{TgU ihhw;3H 9dxYQ+2H+jFDvK#C</ҡDҢ(7+${3JIVeV%KKA"[X[UQX&pݵR`/Z۰S+q(؀٠^lS f "r;wp|pѱ5J>GV0eͣ$CsZ ,T(D=aL7XostDJ$hKcBֳ_T5HDoYOC뵹ZYE|GJ^&*o[OXt@ګJxkFLD.iرa ,ߓ=*S y 2HJQI ba $]-1ϝ[o^$~X`29)dn7թOh*I9jvҴft~[^( Շk u#KjlOU]D_]@r X<иr-Owru jhyJQS8A-$$6inJҎ%^eOS ,<Z#5'UdPt.12K@7>G=$kri:3GͲK`2䗛9&q],O0(]WLU.$$&*l㩐7_|LjxڈʈZ2s(Gse;fHK)Eh Y*@  h_aZ̚߇½H4VXG]Exy% k$|{ԓU {`jDyM_4lǶ <=-3WH9w GBg~tX' q(ք4eECסzQ!EEAtMȧlVߙ&5o ѺB B~;·]ӌbwpya΂O`Nc_:'?]θ싯^,<"\>Qz1>S62f R[˖6t. Qk2$ .DmJDr+O 3<,"*NtL ~xn"DhpyBhΞH-s?|% -U1U~JP)/N4g|y溕LcL Oj<2äޕi5W|7YM}>y2 1S} НI|8i>֨u]ȥF`ZNJk$A[w(Uɪe $RTwdYPiZ< (m$ F=cCun.Ed+, wVVnq0l݉ndv];V8K~+}f\tlv ,Vim6xVѠG_l5臄hzx|9S S"!U%j x¶,dx q|,63+n:a[vL 63Hqp F㱟kI#S_b5lry4E̫$^bλT2Hh=qh{w s3 9ij50O Hb$RLwԋCf~`!' w";[IxUC]=9g>!d :,x?O<',ĝgSV7tV^B)sðȕ&[aso6Yf_2Dȇo`9DieL{\(ʼ$"YL,;!X> rc4Ԟ>^QxyK/Obȃ`Qӗom=i{9Ek7_ɏbn:'OO1V./)@ )#} Cޭ\ty\wU8cw]'*&/{d+DW' ۶&vfo(w0Q$ JZr{Ē]@j-19Hw?BVsZq?e`^i`mMG4Q1węK4 Ҽ,?^"iPEcRŤiVVF9%e}}$`-*>rt=PZ]e?A{8%ܪ-mZhy< (zHVq16XHPjV{[/&3]E[ ag a#KOuch#iΌgmB^ni\psF(0c1SS#< |B&Qkٝ/DSѱ(IՓ(ie|'݈rL(^-iɰ5^|weZ]1\dn30pO,| YJ?͟W6Ts^ʤi~\i*c#KPp^^=ic˽^Vt}zsӗ9endstream endobj 112 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 3300 >> stream xWiTgDĸDpGQ4HB\h@f膦 C H7 l-X31'.˸;3oq?13ss=uNyyޒ1vCL^6gMzW&M"Mc?tČϧF9&8Fb zdlml.2>}줤$נxW.g:'E#׫պDuVw^vܧে6&6A9{kC: 0fS\"(%$T>rCtya2Sf*3ld\_ f,c</f993Y|x3kf3pXf#P;]2YϐQC }bfGQaSSgٶCw::t:Hô^9rpT$QdQ P! {l^R9*e_) K7]2B\4O:NPDMK<ԭT)mst%%J~?ۆ 8LɫT_{Ct|>C,5,φވyW| +'?]o+fN+ `kMZ EE=gМCEy8"MA܁-UGŁKUsBarbNJh̳@)C)8pV=d}1Qx1% 4lpcÍ>~A'G i1uC+V8J'ϳI6ʚu9ޗ>*')37Oݣݥcnֺ;ƨ֜9; HЈߤEC^ 2e2d,5ރ|I%EN(G~wbt×MT{| ;SUfޔn.[%ɺL}EaCJe)5QV!XDPiOVdh>LJ^6܅p~i;svY~"=Q{A߿ BEfDO i-zcK'靼vaXYdQPfiE27ZV_&uJxW/TJߴM\i{Se~ko<+pXj (ɪyYʛ+s΅6Ab(J>Mª Q^ pA^zB1tRpw0g tP;`: t2b;gp lIkhv6&TMxj J2GOnvt[DߩK9$`XY8_ w|{oRCT6(D\rJAcA3L(M`1?‡DgT?/*4UM jRNB~9S|gU94M5ZMr5'6|7h5ur<-s2@Ͼ)%! hWKJ/0(sV-pJiKi3vCeR?Ɔ i+>ʀx`ɰ2E(hW,+q(^=a͡fHm)oo: `KSS ]݄S45ܡRSpJsG+zxփɐj2__.1pEt} d^p ^k=C*6E?+p-SYQ1/S!֒\{1{j0Q֩.ğ g>nC}*Gpq*8 ջL}a.\y*%LػxАY":%8=0R͇o)Ҋx~2^) t4oiJۨ㟡'}{ ]d ,_W Nk{fX%t@fWj@St *{AM'b Q ƍNCu;3:i-E?Sgh~6(GIa#PDAB-~Bu~[NÉ-nMZ8F\xJDHm2UUm8gp)Y*tVmi}V*=5_2I]ɕ^ؿ#뫓F:#u⸡{qA7Ȗn\9Xn`R=r~:P(mF0BqԱ{VKJ>)_ !l8[KCUzՅ(B=M/% ʚ!:'Ȩ r1"&C*e-TʈJZZNGH DIJCKSa|i9ҊX⨨r]1K剓VN8q|OOzMc{ɎC;#%4M ސd@播Y;S$e3a{lmNz.c#Vž0s}pdgS5lwTC`PDf[^hSoc)ˍFd丹x[ƣ΍;b>wd.lLR a!l #Dɓ<~I ڴ|HɥW+UOd3kݦ>hDri=Gȶ׵}q!"{x?H&@N# pp jK e; #'<ݛg.4iP&AFuYqQi _valfiє.\uZ37ews[\x K2{×yh( G/ɞ-9N(+䖧F'Sֶ;F}7c.YQ:g}3=#'XUco Ea7Zc8XzP˜0>/dI>1LAK2xǡ4;79:1j[endstream endobj 113 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 5239 >> stream xX \SW1T!>uޣVu b]@Td'B’i.6ٔB-.Aש_uZlmk|37I:K.s=+P"Hc4aHGx/t/ݬB.Y"1j>^xjla J,"¢~ q;;O;I]\:/ uvQ("ylg"y|"lԩ*jOHoIΪ:_w YyO)H]WJQԂšKdKÖWDtU|vk 1dM2eq3g1w(jIKޣP멱5@m&P&j >LmQS-rj*rޡQ+)WjFfQٔ;&Gͧlj eK ({JJ )B9Pbj(5⨾B ʊ~Zp}(cDk.;4Gk$RI%3)`J[l0e` cmmaA=kGj'*-ޓ`~/Xo!|LǏ|'P9PD_`/:( g[5(ŢD_5l+ *1VKoԆ-D3,_VraZy`nyVҚp\J]Z"¸4/w3l6w,8ˆ%^w;'S^j b)?a+l5e'A`a<]~7׮ݼueF z!W/:bBp-A5!ѩ{S9^mDC^UC(!QB|y;2Wy¶yE|F4#]Qa*x>̃RW\\2sroX9؄=`;7xE'"? k4"ݭpҰ yN1/&SR*RŭiDdm>*˹=cwpABnKSp]}i1F XAz䰆0K(U `E9VEय़6}h&;WPD<h 4~D 0Q km9ԥ 4` ]\CFzүgK`\VʐgfS>dj{QHx?_{5Z[獢R"[ /}ܖyơwl]OΥG1"I,7䕤pm~uӛ/}~.(4 $-6/[]ct<ڼu#b- |ER~Lv|mw۬LMPs ;}60W'*H2y+DƠO' 4 s>sнH;55_}~?Yti.}bs50bEMlz!6]R}T M4jJyDCmNu=Rg7AA9#Qk5. ,֭(#jzQ&4X890u4Xw#6K 4Hむ CN 2ڹW ֋JͨAGy,#*TηiڸSGG{"%^F{Y6SMRu;3#+|^QɤY-ofRAL*@-($yiz NB4"(Gj=YI<Knn>- V嵤s(7=bQz!C/㥧}CW\q{I-#t#@M )]~1^( 1;U _)b8rhg}.:*N=qDc>1IxOED|1պ#^m &# Рhv1xДP{Nπmj9 f:wi$AʞҪ 3y٢Kx)b#C""y^b/J`fXa/[l:TNCr: šj*~(v'*AGݦp5NN+Sk^C0?F3vt j1T̍BoOe{.R<ؿQ櫅p?/c+odHK#[jU*Q$An(<4.0bTEa I㓴cd+(7W4н| =a(kjGXwxȌ"#eȺ:6uCUt/aHd p0`4FNQTDƄ.UNX*MUk'8j_3a 33/.<,QÔIi76-,؂ĺ٢bQi<O&nfll,&HO5]9tY| X!H0rW5g-G;zznt;5Hƶ>}[·!f-,n$#)&>mRC|)ncnyoibɱO;?o GNQ9kE-eϷC8ѬAd~wH=9ɓJ;i[Gb8DJ_ !;v8aYY bP&"h\2}}@.o{fEYxExI౻1$"5yѿIb#'_rk+c$8{a\-b2HcTAB , Qs?Ifx$oF<Gȿl.k^Rl0fU5SsvwϾTb1M?>]zTeϱ87z@j1OX_*T .ba.9tFf'{lKHm<8xwCxSkMcc EaXzi攤#˔KU+cJr.).z3 M })9v>`A?(v~0~sx<s6U&a9H*՞+TwgB66ME2F^Z\~45Eb.(H BAE61)%iߚX^F8H_:G>gf]<?wR]Ui]AjabW|tA3w|%f5 2ƛ?X$.iU#H6;Etv{k֯vٹ t8Cc:BTMa^/}I[/6sMkg闂7D]̅c5dNʋ++TP*bTHǖggep u%E)q +*(De=C酾 o>61LJ*&0/,L[ uϼz^Їs8RT ~aAϭsIۿN:7 87hX䝼#ulqp$)2r]$KZM٪ 2zERk ~zbN۳?>K@}<a|TB\Q(mhxIx/Z~lщpRyx`eЮ6^fJD/=36#r|oϯ\> stream xZ[w~Sؚ49N$U.u4ESJ%Q1%|gX`ڕkoTԩ?gT:OEzZ>b 3/$:]9?UJUH;.O~׋\bbs2։ʴJ7ĠRTJlw&% V$8M \L+i|S0!KwYF5b׋Ɓys8)hב:MURF4:+]O78K-X->UQ|VlI'QVZ[qN%<jd&䄼p A674xtTW zmtyГbQia@A.?qH;Qq-\5, )Ħ 66.}*€  qPGp'܎-rp0|"%8|)ݎkI[{үOaVrXRR1f> N<_ix-ߒͥjG]l/N췸 ]JKemw+e6;뷓CsmkcMzq:Lцч1>F>݋T@k@*,}*lk%-!1 ^4G V Kwajg Kq4 6|޺-pItz) c`m Ԫ] txD|:*%)Zs8m Ɓ6ngY LʩϺV ]&lBFIKqQC.ZfAp"ʽ7"jz;:?h;K .xDon+0b1.?#x^ST9g~sGXKu[HdxL-峁*cPͽlB e*_%"!z1L*>{|v~Ɍ9$rzn9Deud|:'?34cFTѥbTF +mp/(>Cָx+@iюuF#c-e)p0;s"Zi+Ϥ\?,p)#w"Eщ1uob#p_f`ǟhBUqNBrʳ?*Qee0ٰNLM a6tІ9J|7ӱ|DuI0̽ݥ-O\e 2LeYwCl #3V w~`:M^ ]U8prcuQzm'$6M0l:$]Ue#TM*̆Q+} moהѻL; 6P/[lHX/D(%gciHdMN95 e %k5 fJW/[N4 rǚAH-!#(13$7pH j$ $Oة+(&fH>oql|&tCq>c'U Gڷ@5f F%詤ōsբv'x4[䌱2!J Gr,p[p_RM+ K=Of#S'c+T&YG <_|D_ 9ǭn(B>jjTt D=U:G.]j Ca+Eޗ 8W T,Oߋsm*1S`գ~,}ˆ/ 69..su |*H=ֹll"%~.zWBOB)w ' SRFNةPFՙz. 3-?A0̰~ H%%:>wΩ5(zp{pUYKJez#*Ơ0EGаv`㖍#g@ibr+.&jT*m` 3`x%#'k*tPqod_ L:LTFNA tsQJ((l&តSo]n`(w#6 M=lx}3wy}cd)J{)T" v4CTkp"zt@v4oy3ty3KKv!yE 2V;нJQ&qՊ z5܏ ulVҁ_M^@{.3p>kXP0P׭_hpGD6^2?@ӽqss"؊NΌ@5|YPeg|Xs( 룉Wh{:;,z?ep6%f?Ȩ5d!gϱraSu gJ_VlxI=S6[ytrIt79mmlr _-sm}ڐ]bܺl<~ ֆ nhkw /iogmxކgɹ wm:̪cb>*>6 NZ xendstream endobj 115 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 498 >> stream x%OoAgZVLI`b44`H=5m: ,lHl¸.˂ZB/89< <ϐ=\!7"1̖!`~' ~v\+T\E/:1>_ɔV(]C(zB:PS8ˉvɠM>bOb$I!.[Bb ,zDbt~4tn1>/X@q~ 9E.p8|-ХeG8a7d>kJVPkjӔtڵld$5Sz]UtVJ-_k*iu-պM#[Xhn}άj!{i脝F; OAdRUJnփ"ӑE$U%|ThT"鏙!9z _&Ogesv> stream xU{PSw!.ivǽ]3jǴVC-JU $yJHB^'` A ">j5VkGwg۵k:UٮΝ;1};;<,>x [W?Mgq">醴Z%oڸ꣥$HCb|Bﯡ$Ib|OUbPyL'J_.J_v [Wjb(SIbpDS/ꌢ 2N7z*BJ)xg ^vHRMT"@ԉ>+f.*D'Ո2UFaXRѕYGmXۍec{Mf,>^E\>%Bq q'x6kR@&H gfVUzIs]#WG^Z oZXpKw]cDP5]6y@a/B1^N{ڇnQxA,Fj;8]KU.%2"W(Y*|(!BU|[0R}-`SY }5L ! 2ؘ a?L>S idk倗V7׵pռ킐,Ahoooj ^nGiP<M4 = )02Pa/v˭aS.av;o|OxBon>_NjbVfi)Gtq86 x]5+k*i8B$M˻*zg_>:{wѪN~vئpfpyl NKJ&>DXOt?81led.;t[&k͚>N\׏):\TkV2wç W./[-q_7SkJ9Ce$ 1eA\ v[Eݚ"ă"e[Yy]@ Qo00|?2:y~< pUO9DpM= #@|nd filuCdduN;j˺sg_V-m@hWYpX~'|f\֎H9$侃ۭIӡ?|K34|yE¡yeնaɺ]v9 *Hzܥo =@_|T]RdϋWoߛ GXSӔJGJNGN9gc#_oZgs 988kSP =m nBb%zNe'd:#C-DpOɐP@S;J&7ӥTVbqEgQgt[㵍P">F vY>hfпlTdm;Qvʵэf&܍UȧtVOˇeUY/}݆6>-4z5%zJ)>j ZqW'^XH9|h6e'oѮs^mR*ގ^Z%m@ k0S[,d[`JS3܋y6/NeSyf%LE5"-`TBtsDgw149`endstream endobj 117 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 1120 >> stream x5kL[uϡ8M(S YQ0t caLa((-(K9-?ҖRH˥ˀqLqn07!a? ꜙ 1o}?=Ƀc!REq6*A9L4^t5σб9T4]:p\Ԧ*Ty&c7ǎ%WIbiJLyNQ\.dI)MWHLTT *e*{7.TR2[Pu2M!ɳ* 2aw*ki E%JaXR։er\,;0$B< NC! XSXa_xX<eƩ?P F$"nY~\.[u@0)Td Ks ` ZvpC!p;;lV7MNZ= =^MV_ 8426tmZ6_P]G6! E7肖 6=K;sZ^W!l5fJ4_皲&Da Ù[<5/ujHuOP߸sqOĽ T j3zBQ_hnJI][0Vm4ir%?+(rRෙ-5}A{i $~c,&*F^UV +zפֿ}7|虰ء|viang`u9[oonHܺJ[7FZ-]~Hk60XB&|=W,* \֕ε _?u9^o2 .!8_xysa…ϗ=Wa엃 /<Է/hﰸ`;U]Ig#M&kM͠&`#4T.{'rߝ j֬5Dɠ64~X/.@8-.aD t}\awpbh@'aGeendstream endobj 118 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 1128 >> stream x5kLSwϡLJevq8, Kt^)"L]]@ ҞTrz-/JWEp^u[4ޖw?laf_޼=XfiIumE[8v-bk[>eĐ%̓+a9]*a"oԚڨeކ ׮@okPhFD*dl1:k6ղzcQ`(5h M* ^QM`YzAA,|9LZ*4t S4bh2y=al; VX&vBFƼw\R͟3Ba / @ ٫܊pr*K B|k@\ܦ5U~J)JU@$E)P=<2&#xGMe61`$tgGЗW{|ZPma@gWC_"ѩJTU6.ĝFrPc\jj<KTK,IE|gWHc{^Ld3EMPG2M@{{XRDx\VVvB+ )=٥鶹cb0JiW0եҏre?%t41#>F[#M '*v *aF!x!_IoAbBv+5=.rAo9DUPC=R=DgFF~D?Z8 RxCxG]sgW'p5.cc}Mîe2 d;IP\O,p;q"1kbldXAlUU@(k32& OnwZ;p)~Ks]Rc;ʀPGP黛_k%h?nbs:-rs@HUn>s:{i&u03å^,o=/ $>$z&zSCZ;Z;@O? VisIafܛųӃ'dL8mF;V\].?%6Dxw# *‘O3Etgeaؿ* !endstream endobj 119 0 obj << /Filter /FlateDecode /Length 178 >> stream x]O  @eX%C8""dߗGҡ-l8]'k"& "ƪۃ> stream xcd`ab`ddds541H3a!\^kO'VY~':gnnfB+``gd/kliw/,L(QHT04Q020TpM-LNSM,HM,rr3SK*4l2JJ s4u3K2RSRSJsS NՃP>!& ,Ł ]݌= , ~tqwrY%#9z5?X|.+_Sw>c%~ڋtts4455NUW%M2mbĶ~ymʉN=cZӔv[[%4M^|&{-lCKrk̵;mendstream endobj 121 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 6495 >> stream xYxW!4 %ʀ d@BBHK`cWY.r,YlիfȲ\"7lM, H IYr+ g}2su)s z%kޠ:|m?na_x}xI"Q$9&*Z2:|LȔ9sf:y𰄐UaHaA)-'Ν4)==}b0e(9j!1)i!KE aȐgN|"01UJ@IJ E$/MY&^g$|UȽQc6npqwN89sl|Z3g =|Έ6oGpb xXGF$6hb1xLl![EbH,!&KermbNC V3U,b5уG"^">D_bяI'vM $AD"B@+D7!X;' _]v)Ov=G6~=Ru?ciK{zWK_;Ϡ>/Ox7~ DϦU 087(?((H(8"+1dC^|%ˤsX){.7W_{Mᾤn=P*R R@V,Bʒ@IG-WDpi!H#e .87hK,uGk)c Ĩ(F LW<C[ lw@ymޝ&w{ߴVr (iwkO9\CAb[x6^sfX@îO@}Q/2b`#@;=7~{''6.0&J}E-F0Q%َt&GhJ67f_h@hZ6#"ÿ} ![`}0)I,YNyms˹td@1<+-s>={9Rz:sk5%uK@9wY ee漦 D(}' +ظJ ւ`UUw w2#cШ"F`f0L,Vru4ƁQD4N u[f6/GMGAH=sNU/IrzO` U7T  hWlE |X 0!p|OSv|vkŦ`XI:\NSXx-QJl^]cv+(L1NHx#%wZN3fXʴt]C9{mv!,IsP> `MeActQIY{r9zeػ5 :F:Q~trvȵ? ^4C3ق?8 } \E}i(7{Yl9RY̝҄_a5Va 2h 21^XCWB pqyWuu(^ iR.aԪzy_rFw/[W'U.xN+8` YW/5XmV(JT =÷/~}y.B\#9UT1]j2+d()>l,:te"؎ޞ}m q.&\Dr ,ZL7& imN.( _+B;ɣ); 6Udj3Vпx9H WocIʋ5aE%Tgt]U׋.,[F-ꘫkt@Eyg`h^Mowo/Kj9Y%˖*?Μ>@ⴹM2s"^\W.S5٪8md%LJ޽ 7 %p(@w7tÅN#flԪ~d7zCs^ hNڝ֛ ,*i?HW(Y*3'Q#:+&I\68*^CKRru*dGoV+Uel]IhMvr֖*׋4l+ PM:)oҲby, Dn*fk3,#j26)ޞRP. VX83<\?,g>煁WZ#ʊMM LeG<B>X" _zA-qG` _& 2:J,K%xlăDA>m4-?JCtX P&s֠qGӽ"ctYn?yӠhRG)GMxyEāhcÁ{V{~Zl&;{>_@X5 w??%ܩ nwj,|qa|fV ,>V&@sӂq?\ jsGAtZ}iyo)xi>*a<LJ*|m "ڈI-]uHs (A& /Z_tN K[+UP\Q2H1ىbmmw^X>>$ \/Uok<"@%sje~9Eh+J@ZU.S9tX߀G)y2aؓh8SƗ{ Gր+g坲5қ.F;_mÿj"-%;3 %p)T^&$dd4NFh,JḡQ}!q5X M>a]˃{p>l2ҢRڐ锢(uv]IQqQ|0;y7Im3Z1WSٌ$4"wjY`u;wn2Ǟܺ 'A\#V&onn8<'W\=7;֣0+pw3}CqWW3Ѫ'C6Re;YU66z'{k>S1\ޕ)59@}jSaOyQ v`3/JPQV:C`xh@$4ů,j[T+YMի3R\/r1zJLV8˯*O5|P q8LYu)FYI)oa+Mw.]0tdbvAN/Id{ Oάݤ%m)ĉAPr 3˥1 MVĥe@Bɋ%eyyf (:R3OS(l.mptp6coНTc!y%@ Ed*<'AǟAY9 .@J!xS[,+:%6z `5M:.2( PTOȫfsAsEk7m7a䰭crVP&J6gO <\qkDnm.9.>G\Z\LK$m[-F|pg"jͦa$5:N@BAZR x1YgCV+jSA'inyhd4URKK̴­pXXPZb1̖|P_N N2u"7޾Q Nxt ýOTUF(]a'n)ٴ;͕$LILJHmuwD;=k .xYy lǨ >yhT-/ͪjÔ&mzXcv:- ̜ ݘJ: 3{:d,pXxq+DI} Gp KA= nA OOOAW,YAΒ-ov8]g {.wk#pxwSle$uBE#ݍ9l#H}d\>W9eY56W_?bo _W݂.Pg1X֮ߴXpEG\~>9o>I:lf?1tinc?` ݴy%-.3l$*si:tX┛vIةG_[m1/Kfk*#%ٻq;%b9v2c @jzP[MWČ8:i_I i8SPTj$i$2L(=h2h]V" T擺3}ܱvM.ϙ0~q!Ez{Lo4z[/x-xD*cN ٰ./(*Ԙi$+TP%Xnjy6j'lgC˷6(xVp+ypPgj8*FoCai1u&",7NIl<=dztzu'עendstream endobj 122 0 obj << /Filter /FlateDecode /Length 4273 >> stream x[[o~W#<ɶ uҢ)PCM<:dG(s 9b+(`␇pyp|qÑ^Vh 34$u|?UJMH;.:fj}2ltJ1zظI0'VO'8TAaIzk͠+]=_O0j0>^ RWaZmqN4-7"Aюp@Uڬnq2jôpڥV8!ymtfWa25z8"92_QX}&m],KnR!8 ‘{jNn*%&+Jtߡ՞(][쟍HNU8*/ԕ0%;v|W$,L#tF2Jش[5kqʺQ3}7-j{x(bHۉ!.,ʷ<ĄRim|v"#9L@r] ϒl)is^AԾ -3 ѷ=LJ}SR9)ˬ^z"*ZJ@)J]CV`ANi*VSrKBSRj*-;rp~9/Z z\'n}Z`Ieօ&I]&DwsR-qjK=A\ Gw;UV" vI`k$i4ASp~wTa_hsa^`.qc0Jr;˽vO'z˓.Nܬ8,ز]C`_&tM1}$wCZa;Y !00],C*SW%B&QQ8p QDZEqm0d?08E__gB;p^ F"ec+SbM=WL@Ȏ"}dZߋw&$kݪWU(*WJ 3)qq]K#ԟ'Ol:Ff6]1yu:[ZDM>9Q!(i1PmًqAХ Mh>AyV=cUo@ L" vK!96`y!=.J2R]AUq>|$$jѬn!ܑ2ckl/:S%[yk W5w*x,li`Z|+:8e)u9ih2B iu.]0rX|/j 51%6U%]#q\W .ݙts" ;KnMd\'H˲|L^se5O,0g!I&"ݢHUT#>a4T:Zp8, 7qܘDeļ~euS=H>/en4ܗ3_We AcIm$ΙP`p)V~%t[#c!0p{o ʦ/CS߭LjYYOo,2|Q]zў `uaz)rS\u?uO;-G o$ OebNm2R? KE~Kr Ofe6 UD ]>})d)V1RM^蠓P:"J?Ex00%p;ajx53P#&k-T[\B PŸ.J2 "iz,VY?+bNPɫА^蹵n鹑Mtr9Y:)oE%"c9p?odh5T[u6((-F`R.L$ z\uEKmbn:3L#D05W S|O[|@Ә0д?2@Ҽ!F̨sL2 Q7Y_cCYїG6/' ;jG,SUT#> stream x[[o~#v H EHZ)J]Y+UCrp4(cwnoE')}Jݑ815yʶˮ qT S-+hZ^rm ds۬;7jD0vi-@O;XiL4Mey4q/^9Ž2B1H5gF&/#$ޑ&+H'kƦhC:$guouE@3>(^m~ğH\-x$X+Dq/Gpȟ/Wz 7Ȭ$~7 SZBtWUTwrHmӹm[:ϻ*ߵ9oi_͇!F7U4=3I@}4EϚHP"%ߵ!O~pׇ_lqYE}p7$'50"Eש+_EI9(#93ɀ60&oc/InaלkwKyd _fxk$}G]o:ܓm]7;YjXRw3[e8Am7PƬ2fc).ZB =oe?rtFx@*L?MsoH;_W~>w5L>O} ,u01WZk/KEG{@EK2^WD+|&Jg)}0t>2A)f́Lt&X&5C1"7Pypn!l@2q?b; A`Bb\f !eݱ46PIy-F.W1F9pC͓N6gҏD(`ߠ;gC*ia2[x@Of8-K zjT6OFGfXCiFo[ؗL$ʨdaHL8fbjW)FTWF>mD?p4Xv͟v |>'*3%pA &18!5(~x=8?(hgSEmëa:]ƗԸ{~$Bu6#[duR9JLmBwPy߹njB)U"M_Xr$\6rȅƳ2 9UC`)!y1.pV9g(p[]]‰QneL6[ꐥ(-iba9ÌSEAN\ѱd؂xN`oM~vH]ݗZ"|W%b&qf[[y‰ʫsFylb項3iid"fd-`Ry2<C<^a˝_YRBcqge9i'7zIr&omQF1Yk6{whwި5N\`Yƪa9*hC8B=T p|[H1}gD .G/ 8 bY5@&|E&N Z E]9aTXLUyEp-1= =sJ{k<<[˃'9/ϼN̴2iH/_+x?s1VA2ucȔIg ?>jaF_VXC]G'($V3 MW]:02-Ëgٶ| t:3lɿO);5Dp1ᡙWjeg%Ori+L,@h(wFϔV^.(,(5<; d:P)R=Ddl?]":Nl[WQrL jsr͓ΠC~Ú44foR/Cl@Mm~]m:pÝf^ yqlA\ q#筒&X*?.TDfRoIl @]y=|rd3CQOH1uCS'0O#ey8ㆢGUQNs:m8la=κ<#] d%,e筻Y=W_9|v WҎRUḙt'8z֑/47U\=Fٟ\ $(1R 9udaF6]#, /O{x#qA-2aV/"wg3ugdQPd*N1:a+zTyMc0sܑxA"}^^6,_OcC# s=Q$/o?I|$"⼋0:=p$WOɛ[&Dԗ sbk96.V~UԌW!̀f9:L /ߌ7itM3t'UV3Lfy;מ+"L$Eh'#vOKg͈TGW.쾧QOSoU枧bԓQq~9|@)g7CβvK't?jI҉%nAXRbnzc*P)OP{4AsˁWgRoLzJX9Lj6[4pyS1K,:W -"q&)H?~tWVHus'dם<2'|$+G ;u"0UnWtP` x5Z;Fn,>&3DbY#0f\Vfw)7cvBr֐X'fFg'w%th?6D-䧣'G?endstream endobj 124 0 obj << /Filter /FlateDecode /Length 2974 >> stream xZo {o.ЪE".Toi:I})whΐp5(\+oNND,N~T06M!OCPGm%:užN==;a]݁--,}-OR"Dn*qN4oO~Өhu]u::w"zu k:}*+5Dv~ X& uI~vԇ8z`z[EwɊoRtyqI_D$} 8_/390Ih7RN>ik~zJ޳^8}_VawS)kGbbo]((-^Y #Ű 0Q;g~}Y_t IAbNZ{=[8eqJyxju:d<JsͲ|[J]MSort7t5<{s3&Rl$ $ ()AqF+N^QF%ot쵙V7˩ʱCxj!$2FaZXO@R%t+boZ4g)!TWklk+c,pp6X KGC&F5rpgV(=+czTE)Ę>}UtWu()]/W>ӝ j ^&٢r1Y+(A(w(;RhP(0us4aX"柖DƇMfOe4SwˉtxU- 1-]X].hi 8q:7"csD~N*:jw5_J O)Zy6a ќbh/Lț{ѦÏ캟ekk}Y}g_Ƨ$%ab^o%,DN ]jZ^VHMmÂ= w#%yj}4Iɕxl9"/xdQ B *ଂH['i4 qҶW*kUJpS_rϒ+Vt#psTЄ,oΚbl=,IKveƧPM?$a9—&5`W`+!%-zA$\=' Kve -SE/5( Pqŏ>v[ (4``C[1Jqz>zu8?Ra+_&I݉Mڑ 5/sxy8"NT[m|hgH L:;un^qjM6huAVv)n! йq4[R(gK型wk 4y\[@oTɡT rRrجFWA(jѩ/%o:'|'-ՐORR>NFNj6v.|yK-.A ?wɡ@t{tmH,sdMozyU\ITXr k"6Gq'(IɂH2X( øްY$|jw8}@q]fRpҝ?2uYK3'L; 8lQ燐'\4ZDɬ^K( A)R*,DNyg&Ul!= 8%;s4!C[aB)bt0Q hOOq &=ڌ,t#NS7gDLr4!֍] ԀM; d|&%f>/>+D9$%B7E6:!MU2lfe~mS짽rĴz*+c8`>+є4s4d ˧ --umiQzjsRএ5Ør>\h<"(J3Lh:bbx˂[6I=,rQ$q0M˟ DU4 D6.pd~9Rƙ6(fX|(m@XP On?-KԯM:裡R/,\p^lsNۓ?gendstream endobj 125 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 697 >> stream xM]HSqԵq PPM"+HiN/,yr3H-?#i2I /o\Ǝ .ZQIO.J}n/QðCyUUVƚ)$aBr""#\,ԧ/P@ʥ9+u'`i]^gcTrLY$ͬrٰh MN2e]yn[kv4ii&tYYI5Q9,,YavPVO0W3K1d9]K1N@ E a> M5 bkBBes򨛗"C ÉFئzk/.L%#x۪*9OLY"j]h% 0+lh^ rJщwԀ]=uA;cJF=3eWx{pC9$M !GxNlZ_V[*lOF(*?j~3c#j3lZxZ20˕`p;yJD.$ ȒD88bK L{PT?4HOZ^5c,ŋoe?hNwߑ>`#wf*Q/&4ĠmqHT7M?.WI=> stream xT{Peۍ3qUGݻHmUƚ* 0*Fpqww<.'DĤ`2ҋFڔ:cAQL.GI~?RP4M+Oݼ*1sƮޚW*g ^1Z9 FR>E14]ZVn2"MXNu捚RRjvuVCTGkZ( :KMoӒGXdm_Q02\W04{F1Τtl鬚=B(l-./(eiEeS9T.uZO^^2TUj/Z*N)"/>a̷bcb P܉=s^%ՄM!M0R:񙤇&tlzHOe#(x6l#h1} %Pg>l:䠈I)aA'4a5c=q٦ 8V`RƒX ;RwX=oduS((xį%xhᡦ^%Qf&m+*|"q*| Ɛz_ZKٓ;ŝПT{VH?1\[ 0V! I[ XuO4>չBdx0}S:[WYEjءӖHTrKo_ p5^CMXh@KGOt]5D?͘\Eدjs[_nrbՙGThx[HZqk'E0D5"ғO;{2:(>)|a3GT[D`uIG[ ŞC`eJ,VS_d*!]l?rR&9kU"}Z*dU8ͷDB9uz^'1p,6WSCW8ֵWDMY}%<"pi%ׄ1rk2L@mgzSwM[[>l:9+?Oŧ=mz+jK~G>p17N^̍eBBs/Onzk" S |:ߍ6wdsfU>TK_2ҵk/YmPwK\eolY9Q>d)ƽ _bˮ`xjJ`ߕk8c)quAL% 3}D8.)Kb?o$~޽d[Z*ȐG_(-Q%A%aF0cKʛMϸ!\3Eʴ%K|ufwl;r6-'HL㏠z=^)jm13Q,\-1uUVNes`DQ%3vɕڧl}X 5o³S)Ӥ|#T(?nv dF_TCV45$D% )h[7Iendstream endobj 127 0 obj << /Filter /FlateDecode /Length 2747 >> stream xZs P9kIz8~8ӝz&?z` mqї?x9ǛWG*X_vtVFu|zy?UJp9hO/t^=o۸D6DDˆ'di6,~!m+"D6D\1!ۚ-od@jD|-')BAi_JȆ+"B^,H/Nz|) 5aKn6SNޯi(-HZ"["$ԁVى(x""H7{srqTqЁŸdh#ɠa+"$Y,Z0)6M2.qqgcZqUU^dSΫUsrDd%wsf|dڞT>ǧL3"-g6px": "7"7&N k/ñ՝nEC{w:,~M`^fy7Vޝ֡0D\~MD0K`~.n`ҘY1S"${[Q(m&1ݤ&rO9Dnd \Zqp9=Jj[-ϋuDn˞2g,0=%G"' >!>msVt1JtJt7 7"!$/ЁFA-]=?݄`{|c_xߍ.YEMK:CZud|ݚYL&̪m=ei6[ A yvԾqWx:@DH]L6Q״.]e; MQp6 YwxoӲLtRN" .р<OfѦ"P֚N $c3vwy$lN*\͜^t%Ґ2BU 㮝Wm(G]Fo=IK%XqJuWL*V|.4\2=u[Μl29],ٱr׈W+r6| ፸xp T81Mf(sbVa@!-<1ճwaOƩ|2f6cRVwk /Ŝ$r*1h`K̴ٴPl.99/wWa 1M5)9S*ޡꪸNuaҶkB euh "KԚX%+ > u >UTĚJ.߇*]Wu2p s۝@/cNWbWvfL`Ʒ/̥QorUY7p ,2jrsv+\qgj[Z^H>p CΙNo#Mm<|bVC34h|̸ el5+Xa)蔳 [GD?Ddm Rـ!Wkl<!Pw İ!@WDp;sVWs-؃=`A+fw}gLZF}X,YiaPcJ|dt`6miWmu05˩N& kULZɀ G/k5#| v.Pc|aYfBs9cw0[ISQzH)X3%0;-U vX{[ X]4t P;=\_ r d&%-3LT{y8L@AkUp&5%ѡ)\c.35>+z}+*Yw* g3AU;TS~3L@ ~;JxK4e(Q8츺;;әP02- SW">ݍfSM ͤy,گ;`&|tQm"I/~+Ob]fK)g1Ro\vzsVYm个ڈQP]j NQ{#ݠC,va rqA.6]=s|CU~fχV= Wyh3DLא$+Te;M-_nöNJI̴NL0v)'6|ӣÿqPBendstream endobj 128 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 831 >> stream x4CMSL10/8.B@~H$w  tibouCopyright (c) 1997, 2009 American Mathematical Society (), with Reserved Font Name CMSL10.CMSL10Computer Modern+.236 `~$8-Tzw{z|}\yzwwZ]rvipmmtt`rBrwK}z`cDӵ̋2-:IcyoȰƋ֬SGBj: j\~}u$ ɤ0nuok(Z )h=p|hy\H6VW4J'A5Pxsc~ڏsSsHcVeXh_xu"粤P͚ҧ͋gnstms|SS%M:+ǽʜ֋[ F\VbtcZ王ՋՙJbt~qOf@<uCnb  7 Htendstream endobj 129 0 obj << /Filter /FlateDecode /Length 2329 >> stream xZK/#X3Èr " h_(>Vkikc$=3R6Bs^գ|¿~vy`ˋ<:[fJ"m3.tk[l𡷌.2yu&W27 (,6Q6Sp&u zvɇh`h++b6dB5r>XI1 5Hl%;hVH.-3.nºh='LY S')oPV5TW*vhUDˁC?lI`y,8LsډlDUIgEG{Tj',.R2 j!ϗ"AWcþ}<XGʵ^Uы;D2В2Vڞ{^Y/4 Ы| ʝ+4Rf-R" T/P\hdMBp2-jm#vC$  `H>ORYoA*ƆRaD _oCh"`Nck΁oHp71)dV rĠ2͞.Kd-n_`?e5yuWo327dGn"xɟ2>w,ˉ@L;Fm꫺rHqy"eaً`/I&z%!MxRRKiFt x9&bF':A0clY ' $DžU8OP.TNp-Nx5A#LPԳJUTx4!v^Qhb\2 @Hz%n >rny_ШVh}T<1a%` :Og+WW#mQaڟ׌o#!#͐?iзIosO*WJendstream endobj 130 0 obj << /Filter /FlateDecode /Length 3206 >> stream x[[od9~G4 +ug| @bVh`asΦS.OI'YPb9>eW篪lL{g#1䑾0@ZTYJB;dq:m]qMPVY֢*WE-qDq29f׫ڤ^'F^Ei:-x/JfHl;Njc)v{C=0c($p"2K}> kOtFv#gW"Q;Py]&=8".iQ`S{gu3%}1QDo+Z+Vh@IBS_NB@ΓV4Z 1YS-jlbXs`q0$hlάSTg͕hy%3}Z 8 c:JاpOcmrJW뺹׽\u^<Wvge4-oSFfpE`ޖE* @e{(}gu).̿əOhC@s͖:4Opl)CEmLhcI{[Ju$?-kQn]d+p훚ۑLr]$&f{ SfdAYhpQ; B*/6Q VX[_mG ]HfITYł v&kO"UNXc?4pAهy" 7M qs1((ԸWv(nL%F[P'*rw0=026oQ>O[ʮ=Ewc- ɖr_kmcMR7-}oSI'5Ʋ]t d 8)&hħ|T$9A(ȆV$G꧓vJ"~Fꇉ XÌVa\oK 355{2ALT!MI$"XPwTMZ8Nۇ! GVVυq?5G?8>y)r0ؕ*4BG?KzQ52j"-f#"$qrEڸ6ʂc\RMXȑ榸&+T$Z=RbhH*~,| |@p T 0d<=Jes\GWո:B#a;&'Q";s/_؁97%]w\I)G·0:XׇU􈛲rJtєF'$=F˫kKsUi: 0)<*m6K2vƖ-#9R:CdIC>H%kg$O;eg51" }?3C[i:v?S\Pt]M]A97GQ!dў;wn~`~gkN7FBPi+xp{e`}Mмb'n&]m.ϒ.SA5Y4kErR?9RZt8"YUF1u]y$h'ɂΦ޼7値P΁^k/B j&Rka](\Z_!;˝[e-(Wc߯sF\1Uї\r;Gqx9P&x#֒bQ /LOmÜyvBOE]2mlU^RMX|^?ƭ!➔/!B=VW=c?Yz yXvD}T f)?Wendstream endobj 131 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 4720 >> stream xXytUOJ!Hyߊ' ւtH4tII=OfO@(,ZpCA)|7G;7=9==Y#Gl՛wlX0qo8"l_0X87^_3A0G6=5`DY6;=K:]+JNL ;'dѲeK憼pᲐU贐љIљKJHXzlr|fnW23,əzp~(9sCr3BeDžMO 2oSYq45sUZjYs79/nK|hĤa)sϝ5Ś eme`mcdvvvgaΚ˚NJ`ZZZZZZZz5ʚÚE M#f̹8rȞQQwFoRQjq =auA<&Vg~ҧ(ަ)uA}%op@xQEir! Xk\>h5Ag,PRȨ&v}"1ׅ9(@,snvhVbʋ ٭陨Ak0K/J-R0Wo e~jkxπ@0Icd]!dS Fs,N% @Bm@e<%:{ SӁFB/uk7bPq:)Il=phZk}@A\k%JܣW42[x5[֫㘇Y3.m-zt,q1mu!rkT A(c#||l3+x"(k@^H*p,YX}nFN9Q@cx⦥@-y<~-F0_} P߷>8C#8l,+ N}S(uqtXeؘzh:߃x VVWTʴYژ'f9=zEjҙLo.qi(j)F j7V~S&뀒 &Pʠ \ dY7cP᫇<RGx?q??5 ,藁MjvL ]!Us\g4w" P)M r_ɑ]&gIhyO\ŁuMFhM4|< ~g߇OLN(:_rFB10Qٙy&kGA](qГO&ڏ?YS5pMZܚWSV>sdnz}xAM;|Z]I{3xwe.B0/s0_gQd$^ГA~(y820*ܽ+%RH=hQ?O㙬F )ғJSq&|qzZj$P O?Ec@}5? VTRD}_r2*8&cM*M-M;s.y\]mPU񕷻{IAl`R0@e@}[fy/}l 6{'PR"ڬdtɰ#3Uqszn|pHUv &=\[S ;{W%0aHpSAJSZwUX<&)!$ۓ6w7$8~4z/-SY`SYUtMY+[Pᒎccs b:B?4tqXU5^b'o}"NOsX9hj[_+^0eב`*\`2 :)?!\Ϩ{r'sIߣ:^<^*2vh]Tĕ+SH~/K-g?m呓&1MuBv GɽFeOI%񶟎j􃃘ݷho(]JPҫ潠jSLNzO tp޽ D7***b<:C{^=$^3'azRn-lL6P *nׁ`-*E|T#Sr>Xk5myyPlv6;%4Πdaq3J/77)XfZ 8@L)isUz%1i*K lr( )~^z C yg- uk(凟1Bf#漰u%G{+n>4*/[t]$^[2q^-\fosFV%Aȡ-ccd"GxĨ yׇ6+b|erDP,4YJj:%"QNmvVy^.=C (Ms{efpQɯ"00G:yVW;3rG2IqPp0QLOyA^bzn DNO%fR@誴8VdHbrow4b7y_(zd4-=F!jGN#~3p~Q)زSMhLI܉N.8($ٞg}n Aϗ;+\fӌӱh*ol>~"OJJM!}NNxY۟؇>MvT\F9I*o&Ks4?:ǡ&691HBa6y+d:ڣ=-z6߮+uI]YH-\ +W~[m7h0MrB wxʠ#PmZmFNu⶝EDfF䓙6-))ٓ ԭOp^úU66HG=ry.JO"Ɂ|քStYKeѨAU5t(pA? mG~ztLW=BUm\Eeѿ?[U[J+q -\lFn?΃Kݢz@Z6sn%XC GI(UB?C?v߉χ37 =D}@ڪj}e;춉RA,'uBÃoCyx,0*?-s%EZ3̟UAD*)K^xmԈѰ GXQ]SgΓhD{۪zNg$DS/obyef pYK; ]Bobj,c-K2ۉ\vkT_ޟpnmIL#1i@^oЯ^g!GH YI]yǁ*-S 9 4;o)5@OG#*)1ɨڴf^/!x> /W [ 1 3 1 ] /Info 3 0 R /Root 2 0 R /Size 133 /ID [<8e37865358a24a9d82806eb38850e586><0efa862c6511945cae4a60c6ee91e470>] >> stream xcb&F~0 $8J@g8 dŃ}hz1l>oP H DH^{)z ,"}@$)d "YH ""wA$ , bHnVޙ 3l)|7 endstream endobj startxref 81546 %%EOF ipred/inst/doc/ipred-examples.Rnw0000644000176200001440000004270413461342153016517 0ustar liggesusers\documentclass[11pt]{article} \usepackage[round]{natbib} \usepackage{bibentry} \usepackage{amsfonts} \usepackage{hyperref} \renewcommand{\baselinestretch}{1.3} \newcommand{\ipred}{\texttt{ipred }} %\VignetteIndexEntry{Some more or less useful examples for illustration.} %\VignetteDepends{ipred} %\textwidth=6.2in %\VignetteDepends{mvtnorm,TH.data,rpart,MASS} \begin{document} \title{\ipred: Improved Predictors} \date{} \SweaveOpts{engine=R,eps=TRUE,pdf=TRUE} <>= options(prompt=">", width=50) set.seed(210477) @ \maketitle This short manual is heavily based on \cite{Rnews:Peters+Hothorn+Lausen:2002} and needs some improvements. \section{Introduction} In classification problems, there are several attempts to create rules which assign future observations to certain classes. Common methods are for example linear discriminant analysis or classification trees. Recent developments lead to substantial reduction of misclassification error in many applications. Bootstrap aggregation \citep[``bagging'',][]{breiman:1996} combines classifiers trained on bootstrap samples of the original data. Another approach is indirect classification, which incorporates a priori knowledge into a classification rule \citep{hand:2001}. Since the misclassification error is a criterion to assess the classification techniques, its estimation is of main importance. A nearly unbiased but highly variable estimator can be calculated by cross validation. \cite{efron:1997} discuss bootstrap estimates of misclassification error. As a by-product of bagging, \cite{out-of-bag:1996} proposes the out-of-bag estimator. \\ However, the calculation of the desired classification models and their misclassification errors is often aggravated by different and specialized interfaces of the various procedures. We propose the \ipred package as a first attempt to create a unified interface for improved predictors and various error rate estimators. In the following we demonstrate the functionality of the package in the example of glaucoma classification. We start with an overview about the disease and data and review the implemented classification and estimation methods in context with their application to glaucoma diagnosis. \section{Glaucoma} Glaucoma is a slowly processing and irreversible disease that affects the optic nerve head. It is the second most reason for blindness worldwide. Glaucoma is usually diagnosed based on a reduced visual field, assessed by a medical examination of perimetry and a smaller number of intact nerve fibers at the optic nerve head. One opportunity to examine the amount of intact nerve fibers is using the Heidelberg Retina Tomograph (HRT), a confocal laser scanning tomograph, which does a three dimensional topographical analysis of the optic nerve head morphology. It produces a series of $32$ images, each of $256 \times 256$ pixels, which are converted to a single topographic image. A less complex, but although a less informative examination tool is the $2$-dimensional fundus photography. However, in cooperation with clinicians and a priori analysis we derived a diagnosis of glaucoma based on three variables only: $w_{lora}$ represents the loss of nerve fibers and is obtained by a $2$-dimensional fundus photography, $w_{cs}$ and $w_{clv}$ describe the visual field defect \citep{ifcs:2001}. \begin{center} \begin{figure}[h] \begin{center} {\small \setlength{\unitlength}{0.6cm} \begin{picture}(14.5,5) \put(5, 4.5){\makebox(2, 0.5){$w_{clv}\geq 5.1$}} \put(2.5, 3){\makebox(2, 0.5){$w_{lora}\geq 49.23$}} \put(7.5, 3){\makebox(2, 0.5){$w_{lora} \geq 58.55$}} \put(0, 1.5){\makebox(2, 0.5){$glaucoma$}} \put(3.5, 1.5){\makebox(2, 0.5){$normal$}} \put(6.5, 1.5){\makebox(2, 0.5){$w_{cs} < 1.405$}} \put(10, 1.5){\makebox(2, 0.5){$normal$}} \put(3.5, 0){\makebox(2, 0.5){$glaucoma$}} \put(6.5, 0){\makebox(2, 0.5){$normal$}} \put(6, 4.5){\vector(-3, -2){1.5}} \put(6, 4.5){\vector(3, -2){1.5}} \put(3.5, 3){\vector(3, -2){1.5}} \put(3.5, 3){\vector(-3, -2){1.5}} \put(8.5, 3){\vector(3, -2){1.5}} \put(8.5, 3){\vector(-3, -2){1.5}} \put(6.5, 1.5){\vector(3, -2){1.5}} \put(6.5, 1.5){\vector(-3, -2){1.5}} \end{picture} } \end{center} \caption{Glaucoma diagnosis. \label{diag}} \end{figure} \end{center} Figure \ref{diag} represents the diagnosis of glaucoma in terms of a medical decision tree. A complication of the disease is that a damage in the optic nerve head morphology precedes a measurable visual field defect. Furthermore, an early detection is of main importance, since an adequate therapy can only slow down the progression of the disease. Hence, a classification rule for detecting early damages should include morphological informations, rather than visual field data only. Two example datasets are included in the package. The first one contains measurements of the eye morphology only (\texttt{GlaucomaM}), including $62$ variables for $196$ observations. The second dataset (\texttt{GlaucomaMVF}) contains additional visual field measurements for a different set of patients. In both example datasets, the observations in the two groups are matched by age and sex to prevent any bias. \section{Bagging} Referring to the example of glaucoma diagnosis we first demonstrate the functionality of the \texttt{bagging} function. We fit \texttt{nbagg = 25} (default) classification trees for bagging by <>= library("ipred") library("rpart") library("MASS") data("GlaucomaM", package="TH.data") gbag <- bagging(Class ~ ., data = GlaucomaM, coob=TRUE) @ where \texttt{GlaucomaM} contains explanatory HRT variables and the response of glaucoma diagnosis (\texttt{Class}), a factor at two levels \texttt{normal} and \texttt{glaucoma}. \texttt{print} returns informations about the returned object, i.e. the number of bootstrap replications used and, as requested by \texttt{coob=TRUE}, the out-of-bag estimate of misclassification error \citep{out-of-bag:1996}. <>= print(gbag) @ The out-of-bag estimate uses the observations which are left out in a bootstrap sample to estimate the misclassification error at almost no additional computational costs. \cite{double-bag:2002} propose to use the out-of-bag samples for a combination of linear discriminant analysis and classification trees, called ``Double-Bagging''. For example, a combination of a stabilised linear disciminant analysis with classification trees can be computed along the following lines <>= scomb <- list(list(model=slda, predict=function(object, newdata) predict(object, newdata)$x)) gbagc <- bagging(Class ~ ., data = GlaucomaM, comb=scomb) @ \texttt{predict} predicts future observations according to the fitted model. <>= predict(gbagc, newdata=GlaucomaM[c(1:3, 99:102), ]) @ Both \texttt{bagging} and \texttt{predict} rely on the \texttt{rpart} routines. The \texttt{rpart} routine for each bootstrap sample can be controlled in the usual way. By default \texttt{rpart.control} is used with \texttt{minsize=2} and \texttt{cp=0} and it is wise to turn cross-validation off (\texttt{xval=0}). The function \texttt{prune} can be used to prune each of the trees to an appropriate size. \section{Indirect Classification} Especially in a medical context it often occurs that a priori knowledge about a classifying structure is given. For example it might be known that a disease is assessed on a subgroup of the given variables or, moreover, that class memberships are assigned by a deterministically known classifying function. \cite{hand:2001} proposes the framework of indirect classification which incorporates this a priori knowledge into a classification rule. In this framework we subdivide a given data set into three groups of variables: those to be used predicting the class membership (explanatory), those to be used defining the class membership (intermediate) and the class membership variable itself (response). For future observations, an indirect classifier predicts values for the appointed intermediate variables based on explanatory variables only. The observation is classified based on their predicted intermediate variables and a fixed classifying function. This indirect way of classification using the predicted intermediate variables offers possibilities to incorporate a priori knowledge by the subdivision of variables and by the construction of a fixed classifying function. We apply indirect classification by using the function \texttt{inclass}. Referring to the glaucoma example, explanatory variables are HRT and anamnestic variables only, intermediate variables are $w_{lora}, \, w_{cs}$ and $w_{clv}$. The response is the diagnosis of glaucoma which is determined by a fixed classifying function and therefore not included in the learning sample \texttt{GlaucomaMVF}. We assign the given variables to explanatory and intermediate by specifying the input formula. <>= data("GlaucomaMVF", package="ipred") GlaucomaMVF <- GlaucomaMVF[,-63] formula.indirect <- Class~clv + lora + cs ~ . @ The variables on the left-hand side represent the intermediate variables, modeled by the explanatory variables on the right-hand side. Almost each modeling technique can be used to predict the intermediate variables. We chose a linear model by \texttt{pFUN = list(list(model = lm))}. <>= classify <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } fit <- inclass(formula.indirect, pFUN = list(list(model = lm)), cFUN = classify, data = GlaucomaMVF) @ \texttt{print} displays the subdivision of variables and the chosen modeling technique <>= print(fit) @ Furthermore, indirect classification predicts the intermediate variables based on the explanatory variables and classifies them according to a fixed classifying function in a second step, that means a deterministically known function for the class membership has to be specified. In our example this function is given in Figure \ref{diag} and implemented in the function \texttt{classify}.\\ Prediction of future observations is now performed by <>= predict(object = fit, newdata = GlaucomaMVF[c(1:3, 86:88),]) @ We perform a bootstrap aggregated indirect classification approach by choosing \texttt{pFUN = bagging} and specifying the number of bootstrap samples \citep{ifcs:2001}. Regression or classification trees are fitted for each bootstrap sample, with respect to the measurement scale of the specified intermediate variables <>= mypredict.rpart <- function(object, newdata) { RES <- predict(object, newdata) RET <- rep(NA, nrow(newdata)) NAMES <- rownames(newdata) RET[NAMES %in% names(RES)] <- RES[NAMES[NAMES %in% names(RES)]] RET } fit <- inbagg(formula.indirect, pFUN = list(list(model = rpart, predict = mypredict.rpart)), cFUN = classify, nbagg = 25, data = GlaucomaMVF) @ The call for the prediction of values remains unchanged. \section{Error Rate Estimation} Classification rules are usually assessed by their misclassification rate. Hence, error rate estimation is of main importance. The function \texttt{errorest} implements a unified interface to several resampling based estimators. Referring to the example, we apply a linear discriminant analysis and specify the error rate estimator by \texttt{estimator = "cv", "boot"} or \texttt{"632plus"}, respectively. A 10-fold cross validation is performed by choosing \texttt{estimator = "cv"} and \texttt{est.para = control.errorest(k = 10)}. The options \texttt{estimator = "boot"} or \texttt{estimator = "632plus"} deliver a bootstrap estimator and its bias corrected version {\sl .632+} \citep[see][]{efron:1997}, we specify the number of bootstrap samples to be drawn by \texttt{est.para = control.errorest(nboot = 50)}. Further arguments are required to particularize the classification technique. The argument \texttt{predict} represents the chosen predictive function. For a unified interface \texttt{predict} has to be based on the arguments \texttt{object} and \texttt{newdata} only, therefore a wrapper function \texttt{mypredict} is necessary for classifiers which require more than those arguments or do not return the predicted classes by default. For a linear discriminant analysis with \texttt{lda}, we need to specify <>= mypredict.lda <- function(object, newdata){ predict(object, newdata = newdata)$class } @ and calculate a 10-fold-cross-validated error rate estimator for a linear discriminant analysis by calling <>= errorest(Class ~ ., data= GlaucomaM, model=lda, estimator = "cv", predict= mypredict.lda) @ For the indirect approach the specification of the call becomes slightly more complicated. %Again for a unified interface a wrapper %function has to be used, which incorporates the fixed classification rule The bias corrected estimator {\sl .632+} is computed by <>= errorest(formula.indirect, data = GlaucomaMVF, model = inclass, estimator = "632plus", pFUN = list(list(model = lm)), cFUN = classify) @ Because of the subdivision of variables and a formula describing the modeling between explanatory and intermediate variables only, we must call the class membership variable. Hence, in contrast to the function \texttt{inclass} the data set \texttt{GlaucomaMVF} used in \texttt{errorest} must contain explanatory, intermediate and response variables. Sometimes it may be necessary to reduce the number of predictors before training a classifier. Estimating the error rate after the variable selection leads to biased estimates of the misclassfication error and therefore one should estimate the error rate of the whole procedure. Within the \texttt{errorest} framework, this can be done as follows. First, we define a function which does both variable selection and training of the classifier. For illustration proposes, we select the predictors by comparing their univariate $P$-values of a two-sample $t$-test with a prespecified level and train a LDA using the selected variables only. <>= mymod <- function(formula, data, level=0.05) { # select all predictors that are associated with an # univariate t.test p-value of less that level sel <- which(lapply(data, function(x) { if (!is.numeric(x)) return(1) else return(t.test(x ~ data$Class)$p.value) }) < level) # make sure that the response is still there sel <- c(which(colnames(data) %in% "Class"), sel) # compute a LDA using the selected predictors only mod <- lda(formula , data=data[,sel]) # and return a function for prediction function(newdata) { predict(mod, newdata=newdata[,sel])$class } } @ Note that \texttt{mymod} does not return an object of class \texttt{lda} but a function with argument \texttt{newdata} only. Thanks to lexical scoping, this function is used for computing predicted classes instead of a function \texttt{predict} passed to \texttt{errorest} as argument. Computing a $5$-fold cross-validated error rate estimator now is approximately a one-liner. <>= errorest(Class ~ . , data=GlaucomaM, model=mymod, estimator = "cv", est.para=control.errorest(k=5)) @ %%To summarize the performance of the different classification techniques in the considered example of glaucoma diagnosis, the 10-fold %%cross-validated error estimator delivers the %%results given in Table \ref{tenf}. %%\begin{figure} %%\begin{center} %%\begin{tabular}{ rrr } %%\hline %%dataset & method & error estimate \\ %%\hline %%\texttt{GlaucomaM} & {\sl slda} & 0.168 \\ %%\texttt{GlaucomaM} & {\sl bagging} & 0.158 \\ %%\texttt{GlaucomaM} & {\sl double-bagging} & 0.153 \\ %%\texttt{GlaucomaMVF} & {\sl inclass-bagging} & 0.206 \\ %%\tetxtt{GlaucomaMVF} & {\sl inclass-lm} & 0.229 \\ %%\hline %%\end{tabular} %%\caption{10-fold cross-validated error estimation of %%the misclassification error for several classification %%methods: {\sl slda} - stabilised linear discriminant analysis, %%{\sl bagging} - bagging with 50 bootstrap samples, %%{\sl double-bagging} - bagging with 50 bootstrap samples, %%combined with sLDA, {\sl inclass-bagging} - %%indirect classification using bagging, %%{\sl inclass-lm} indirect classification using %%linear modeling. \label{tenf}} %%\end{center} %%\end{figure} %%Note that an estimator of the variance is available for the ordinary %%bootstrap estimator (\texttt{estimator="boot"}) only, see \cite{efron:1997}. \section{Summary} \ipred tries to implement a unified interface to some recent developments in classification and error rate estimation. It is by no means finished nor perfect and we very much appreciate comments, suggestions and criticism. Currently, the major drawback is speed. Calling \texttt{rpart} $50$ times for each bootstrap sample is relatively inefficient but the design of interfaces was our main focus instead of optimization. Beside the examples shown, \texttt{bagging} can be used to compute bagging for regression trees and \texttt{errorest} computes estimators of the mean squared error for regression models. \bibliographystyle{plainnat} \bibliography{ipred} \end{document} ipred/inst/COPYRIGHTS0000644000176200001440000000107513461342153013755 0ustar liggesusersCOPYRIGHT STATUS ---------------- The bulk of this code is Copyright (C) 2002-2012 Andrea Peters and Torsten Hothorn except the code in .R/irpart.R .R/predict.irpart.R which are modifications from the files rpart.s and predict.rpart.s from package `rpart', version 3.1-8 which is Copyright (C) 2000 Mayo Foundation for Medical Education and Research with modifications for R by Brian D. Ripley. All code is subject to the GNU General Public License, Version 2. See the file COPYING for the exact conditions under which you may redistribute it. ipred/inst/NEWS0000644000176200001440000001525514120335223013034 0ustar liggesusers# $Id: CHANGES,v 1.48 2009/09/09 15:40:28 hothorn Exp $ 0.9-12 (15.09.2021) predict(, newdata) returned factors with set of levels depending on newdata, spotted by Max Kuhn 0.9-11 (12.03.2021) suggest party 0.9-10 (03.03.2021) suggest randomForest RNGversion("3.5.3") 0.9-9 (29.04.2019) RNGversion("3.5.3") inbagg potentially treated the y ~ . case incorrectly 0.9-8 (05.11.2018) test output update 0.9-7 (17.08.2018) sbrier returned incorrect answers for unseen time points; reported and fixed by Marvin Wright 0.9-6 (01.03.2017) register C routines 0.9-5 (28.07.2015) fix NAMESPACE 0.9-4 (20.02.2015) register predict.ipredknn 0.9-3 (20.12.2013) use trapezoid rule to compute integrated Brier score in sbrier 0.9-2 (02.09.2013) NAMESPACE issues, TH.data 0.9-0 (22.10.2012) Due to interface changes in rpart 3.1-55, the bagging function had to be rewritten. Results of previous version are not exactly reproducible. 0.8-13 (21.02.2012) import(survival) 0.8-12 (20.02.2012) use prodlim to compute censoring distributions in sbrier (makes a difference for tied survival times) GPL (>= 2) and no require in .onLoad 0.8-11 (08.02.2011) depends R >= 2.10 0.8-10 (02.02.2011) compress data files 0.8-9 (27.01.2011) fix nrow problem in sbrier, spotted by Phil Boonstra avoid partial matches of function arguments 0.8-8 (09.09.2009) documentation fixes 0.8-7 (27.03.2009) survival fixes 0.8-6 (28.07.2008) make R devel happy ($ is no longer allowed) 0.8-4 (09.10.2007) change maintainer 0.8-3 (29.06.2005) terms(formula, data) needs `data' argument (suggested by BDR). 0.8-2 (09.12.2004) - slda: correct for one explanatory variable: ewp <- svd(solve(diag(diag(Snull), ncol = ncol(Snull)))%*%Snull) ^^^^^^^^^^^^^ 0.8-1 (25.11.2004) - change #!/bin/bash -> #!/bin/sh 0.8-0 (02.06.2004) - correction of NAMESPACES 0.7-9 (13.05.2004) -description file, insert suggests: mvtnorm 0.7-8 (21.04.2004) - don't run selected examples and ipred-tests.R 0.7-7 (02.02.2004) -return predicted values for error estimations "boot" and "632plus" if required -optional argument determining which observations are incuded in each sample within 'errorest' -"boot" and "632plus" can be computed simultanously 0.7-6 (16.01.2004) fix tests/ipred-segfault 0.7-5 (19.12.2003) examples of inbagg and predict.inbagg (don't use mvtnorm) 0.7-4 (16.12.2003) R-1.9.0 fixes 0.7-3 (03.11.2003) fix documentation bugs found by `codoc' 0.7-2 (29.10.2003) `rpart' is sensitive to compilers / optimization flags: the results we compare `ipred's tests with are produced with an optimized version of `rpart' (gcc -O2). `eigen' in `slda' replaced by `svd' 0.7-1 (08.08.2003) adapt to changes in R-devel and lda (package MASS) 0.7-0 (08.08.2003) add namespaces 0.6-15 (----) new argument "getmodels=TRUE" to cv: the returned object has an element "models", a list which contains the models for each fold. new interface for inclass and adding method inbagg. 0.6-14 (13.03.2003) clean up bagging.Rd 0.6-12 (12.03.2003) methods for "integer" for the generics "bagging", "cv" and "bootest" do not call methods to generics directly, since they may be hidded (because not exported: predict.lda) 0.6-11 (05.03.2003) 632plus was false when the no-information error rate was less than the raw bootstrap estimator (eq. 29 was used instead of eq. 32 in Efron & Tibshirani, 1997). Thanks to Ramon Diaz for reporting. changed the RNGkind to RNGkind("Wichmann-Hill", "Kinderman-Ramage") or RNGversion("1.6.2") making the regression tests pass R CMD check with R-devel (1.7.0) ipred is allowed to import rpart.{anova, class, exp, poisson, matrix} from package rpart, thanks to BDR. 0.6-9 (25.02.2003) the terms attribute of data in errorest.data.frame may cause problems with some predict methods -> deleted 0.6-7 (17.02.2003) use a formula / data framework in cv and bootest. "model" now deals with the original variable names (and formula) instead of "y" and "X". "model" is now allowed to return a function with newdata argument for prediction. This is especially useful for estimating the error of both variable selection and model building simultaneously, the vignette gives a simple example. cv.numeric and bootest.numeric were broken and gave faulty estimates of MSE, both problems fixed if the maximum of votes for any class is not unique, the class is choosen by random in predict.classbagg now. Formerly, the class with lowest level was choosen by mistake. 0.6-6 (06.02.2003) fixes required by attached "methods" package 0.6-4 (18.12.2002) R CMD build problems 0.6-3 (03.12.2002) cv in errorest did faultly use all observations for estimating the error which lead to over optimistic results 0.6-2 (18.10.2002) documentation updates and copyright status added 0.6-1 (02.10.2002) documentation fixes 0.6-0 (27.09.2002) added vignette documentation updates 0.5-7 (23.09.2002) add internal functions irpart and predict.irpart for speeding up standard bagging use error.control for the specification of control parameters cv can be used to caculcate an "honest" prediction for each observation 0.5-6 (12.09.2002) code factors in GBSG2 data as factors. Documentation update. Add keepX argument to ipredbagg 0.5-5 (10.09.2002) set rpart.control(..., xval=0) by default 0.5-4 (05.08.2002) added k-NN with formula interface and stabilized LDA 0.5-3 (01.08.2002) use rpart.control() for regression and survival new documentation for bagging and friends 0.5-2 (30.07.2002) new low-level functions cv and bootest for error rate estimators (misclassification, mse, brier score) 0.5-1 (25.07.2002) bagging code completely rewritten 0.4-6 (27.06.2002) out-of-bag error for regression trees fixed. 0.4-5 (17.06.2002) use "minsplit = 2" in `rpart.control' passed to `bagging' 0.4-4 (17.05.2002) use predict.lda in bagging and predict.bagging bagging(..., method="double") did not work for factors. 0.4-3 (07.05.2002) bugfix in bagging (in models with one regressor), changes in documentation errorest 0.4-2 (10.04.2002) predict.bagging much faster, OOB much faster 0.4-1 (08.04.2002) bugfix in print.inclass, predict.inclass 0.4-0 (26.03.2002) pre-release for CRAN/devel ipred/cleanup0000755000176200001440000000045014120335534012731 0ustar liggesusers#!/bin/sh for f in ./R/*~; do rm -f $f done for f in ./tests/*~; do rm -f $f done for f in .*~; do rm -f $f done for f in ./man/*~; do rm -f $f done for f in ./data/*~; do rm -f $f done for f in *~; do rm -f $f done find . -name "DEADJOE" -exec rm -f {} \; exit 0