ipred/0000755000176200001440000000000014402371032011351 5ustar liggesusersipred/NAMESPACE0000644000176200001440000000436214402334514012601 0ustar liggesusersuseDynLib(ipred, .registration = TRUE) ### exported functions export( getsurv, bagging, ipredbagg, control.errorest, errorest, bootest, cv, inbagg, inclass, ipredknn, predict.ipredknn, kfoldcv, mypredict.lm, rsurv, sbrier, varset, slda) importFrom("stats", "as.formula", "complete.cases", "cov", "delete.response", "model.extract", "model.frame", "model.matrix", "na.omit", "naresid", "predict", "rexp", "rnorm", "runif", "terms") importFrom("utils", "data", "getFromNamespace") ### register S3 methods #S3method(print, bagging) S3method(slda, default) S3method(slda, formula) S3method(slda, factor) S3method(bagging, default) S3method(bagging, data.frame) S3method(errorest, default) S3method(errorest, data.frame) S3method(bootest, default) S3method(bootest, integer) S3method(bootest, factor) S3method(bootest, numeric) S3method(bootest, Surv) S3method(inbagg, default) S3method(inbagg, data.frame) S3method(inclass, default) S3method(inclass, data.frame) S3method(cv, default) S3method(cv, integer) S3method(cv, factor) S3method(cv, numeric) S3method(cv, Surv) S3method(ipredbagg, Surv) S3method(ipredbagg, factor) S3method(ipredbagg, numeric) S3method(prune, classbagg) S3method(prune, survbagg) S3method(prune, classbagg) S3method(print, classbagg) S3method(print, regbagg) S3method(print, survbagg) S3method(print, classbagg) S3method(summary, classbagg) S3method(summary, survbagg) S3method(summary, classbagg) S3method(print, cvclass) S3method(print, bootestclass) S3method(print, cvreg) S3method(print, bootestreg) S3method(print, cvsurv) S3method(print, bootestsurv) S3method(print, inbagg) S3method(summary, inbagg) S3method(print, inclass) S3method(summary, inclass) S3method(predict, inbagg) S3method(predict, inclass) S3method(predict, classbagg) S3method(predict, regbagg) S3method(predict, survbagg) S3method(predict, slda) S3method(predict, ipredknn) #S3method(predict, sclass) #S3method(predict, sreg) S3method("ipredbagg", "default") S3method("ipredbagg", "integer") ##imported functions importFrom(class, knn) importFrom(rpart, rpart, prune, na.rpart, rpart.control) importFrom(MASS, lda) importFrom(survival, Surv, is.Surv, survfit) importFrom(nnet, multinom) importFrom(prodlim, prodlim) ipred/data/0000755000176200001440000000000014402361655012274 5ustar liggesusersipred/data/DLBCL.rda0000644000176200001440000000774314172231220013603 0ustar liggesusersX 8ן"DDD%mXK%,HB Y#z"-,C/RY %[ǾfX+|w}u99g.?CcDc8#,,)+O K4u5p,4c9m䦍4NRz\4,a>ga / |4a~]V 4A4 BҰQ F6Ұ4lY෨ 67zeqCSwVYYY9uc X"֡u(c*XvCN22222222%bĈXD,"˃A_<;&%&%&1y,Sy,Sy,1y,1y,,,,ls ۱)+7!fm[SN//'#b~|[9z'z'OyXXjluaǾ=4O|" #[#ndMbԹ^-祮p>Gho̶*+g-?苛uu_Ӯ:m^>篠G˝ y y[Y܇8=`BBBQN5vuCĵ/ Ս[׾4TSG oNjQQZ_pb]7yud~c㷊@~VL,s 65~k0bKFRTDh*CBmOAAOQ> )]-(f P 9q7"\A[gF@&!ZӝH=jXUXJ)C7H晩@qdaEzVaTfRjB#x ڊKsqǬ#h H O 'ΣL~9e:7 ECtDe12z|J&?=V C[BA_Va ˁocJQJPt\*_`CP}or4%ywd21~K5 rT%%0`tx]TAB ]"ؖCMEMKfBdyd+ '[?#+DSe޾_oN IJ ; 2SF+]hBkj}v6B{ŊCl|gQ@dn.≆|󚯿vZAJRI4wE$U@ᑐ|Սm8,CS'Vnpf N[BUV[@~~Q] 'L,x鹏C7eMjz̲u֭u U(<6fVkbuNR"IFߊOa^)P&c "{ZBN V40U$+B,trzreyF?,V >9LE5zQc}r6ZKVYfu6#ʸ0*mu<~StY .EIԒj{2e[M¾Y:_ĉKk܈%P/ 1:f'Etuψ>I+4Bky\4429@k0m\<Ѧ6z/@n@\e+2K19M bPq,)&^+_!=$h3OMRP)(RR$MOf!YV;c3Ԕqќp4L~8 j=Of̷IԮ+ l>-ѳFE[M4T$ 5YEEvz@#ߍn#7ȟPN\*TJfwĜ׬[k.4YKNJBN-H(HDC,.^ֶ^vQg91=ᎻT|Hz^1Aj9Γh[=&Ǐr! Jnn2 s7~!QPZSP%xwk Z=gC HA$hNoEV+i.~~(RǷ\o, -kϕ10vq*j+~4;h@Yh9#1v/=zǛ᠛E/-=<So9ruq;M6]!mjPRK<:w#fM=4O uPJQ(WR-zB?T+$iq+c+P{ڝ6q|/sku[.W9PlR *Rܳ ,O(sb/8 *Fͧ }A|h١eT{gGMm飮o_8&f{M}l\t~"$g^C+YnB`u|hCsu(Lwsmk,{Ml=qe55}QCh߱o᦭pg`Ϗ8(O޲{FjPBYj@=:-r46$B}Y"FTuyzԍNXPc Q+~r4' q';ej7ZO BIN 6ԟ[le7ph̸}g8?mJ v/1j*h^-ǣjN# D4b!pŢ<2"_ fB/ k.eM!ʭK,mQ߳;ȦA׿) ھ76 ~ħɛv7s5CC)u ޔDxaZ XOQ;vVR$uBۇK[I_Ri2ȹ]PŻKwAe[8hkrmrz|j eGj2z>@PpԌ'>ok6A5ҡaom1ylc{U HiM']%kh%`|T2縋' jZ~> ˶5J³+bߢ"4j bGg8Ppf 4t0 e3;' Pmō,;ӘN}nlOYn x*]=6FŦ+PMPsbU[j!a -q qF-Jrep[V8ghP&ӝ\:_lh]pvqss:Gw92BXOٜsYtuSN-,E"KqR^d,Bu 8ܭdl]m0)0G<< <<;A`g3x LT Ly LAT L Lя ǬL@d`j~G`|G`zG`*yG`jwhG`uLGg`*uDGg`tܼX4? Pipred/data/Smoking.rda0000644000176200001440000000244214172231220014361 0ustar liggesusersW[L\U3UXQkZ)p$"U 0 ǔ%fT t@TcM6&a?Ao؏^{H4=f[>7|J "JQlIn'n p7`+T ~6B< (< x $Px<(Tlpa@%-{o__[gdG/Cm-C5o7㧬[sQKHtlx0@L.)p]NZ?mQ՜@Zof>SGEvV8٩g<*ӛR鱼}k8f<ޚl[_lU.6ޘKϭVKk:"]ʖ-!Rbej=xl+~+VU9ީJ/u2fY/QYx|>zz>,[|oQ=v}%]?N:}>ˏR=+'uԯ_ѹc4_m+tY> 椙R5aCZ,:tNQ~j+rot/ ݓG?}Fѽxx gSd}',:R9Ʃ%ҟ4G^IL>C^<o_>~g>>Mp>t~Fo.o%JC =lyڨO-w9:6^ {t^&DU\O^Fx-Ofݧ>kyёjv*ū/,^xUU'hآa-hآa-hآሆ#h8ሆ#h8ሆ#hኆ+hኆ+hhE#,aEB#p,uo>ܜFipred/data/GlaucomaMVF.rda0000644000176200001440000004531014172231220015054 0ustar liggesusers7zXZi"6!XkJ])TW"nRʟXS#&'ƯNUL~<Ћ2ݬ-+֦`Eu&JKuA (T%fQ{ Ri>x6LO/@OW5y3VYz|c(w(gAjQzZ!+ܕӈ@8?Ne?Q߀wATU[%/Yֿ(q?= gCtd%*%G)*%jjɵ?MƯ&M*\(QJyNń "I:^zdsomNBYPJ>K #C/,SaÛwe4y9!HU +9t^:^1 pQq"-~j[WͩdA ,Cݧξ xۦFqwC[)pӺژY_% %/PeC!xNj`~8;Gf&a0 9P 8T.b InA}Xc\T~ o -T)GK h&g#ϸ@!vYZ41bGk%>ԘC ZYM~+N`i>vVGUzN`zq|"άLu?zeOHzY,TGIJqo%9O|2fa;TypjueA9'?f]b!acn( Mү҉8"! 5Fo M=?$V81#΀?&MvD{@bS!&i'I~֦s[GO^Dt?iniDszPQFvd}ǎ7[`F;>8XЮkj &{)+$e F$4e3` v|bDpuѩFle0T$bK1̀ ɳ%ТbsZI͆3S5^?jJ0ʎB '7l}\OwyNk_qӬk}ݥm@aWx9 vٸRd2wsWYʏ Mi;sZ0>PE.?Qo* ҇d Ԓ)%ll3"~ʔx 3:csl ԸvKA槁c932yBaT]7Kª&c}NR}%*pV)/^CXg;Q -E,zF}0dt69>ȣ=:ǂM5^ٲeu yksY=LLX]8R.@i*ӞQ& h;Oue#mE*;R+c[W}3QE+y1=~k{Fp4vao H{cvSPظKwN)s}C &HP7uF &*QJ펷1x~)ApAvV\j81[M*ȠҊص'e b77"$'6Roaz]_iOv<\3p_Fwz-/%`|l∟k0-n^k]G %gm]>qPͰYEKQN[/߉R?Fd L04T_]-ǖ~Aeo,jJ8s k>+ sJjQ~&t4Db2JE/|2Os@nIosnY1@׸Y_ 8ڙԖg;51E"p|9e*{- BrPkH _}[ ?r*^b}` YƙAUEn&CdޠQ}bG~FVA#zk],xl!2}km ++ė@Ͽbwd5cd/Qe%,V^ nuPᘸ{JGnY;2m]2i(rݸ'3q}y*efVƩ$Eu%28opݙv#0! :liRH 0)ZɁՊw~z"#9fT(t #+0P<\\(=}[$q~| $ 曧U?2kXv $pn\L4C[b Y! 4g.OB>e[.pw,p8A4E *Y].c?LЪϿ bס9=>Ȣ_:f9%  $X=Wp(~:X@rlqS5WfOqLbֱш퉽ѿKԂ/|2aUvbf p>M7q$$<n3N]CeڞGyiKG ۂ(a|NZk$ *RN.u*ʚf&k@*ܸk8Lک;ԝa;OB?(M74. IPWbh.0<\qKyyb>ܚ2a?BhYUj J<|Q!=Eϛn\Y+5*\&ӵhm8fQ }19 o.Wb֟|-wKw#dt?Լ|,@GXgESh^C Duؤ;ɑ$fC%eU1\Bd/ϸ!WwХ>1LҡsKM5U6q 9€&5,$UfAd!5GxF3qI?7;r`We ~PŠIU1PVrȷv>v~A荓|`,G7)Hs~<)+nYs{'2b7S[ғ7-*PwESf()J~D]5Lq /-.1 1Ԏw/`m u(4t>0 C{}EA=xHwťx' r`o 5yNN%bvf4Av$, Bcp9R`Z.Hr 20Zԝ\ժ~ ZLgގ7u㊻~<`$sA4/w=RA9m%[%ʔT,_ʆ%|4d>k2^߉IYOO Y{Wޢ``ۗ :VBCqG'+p\b' JisfU&sFkMm jB'ynKǧ([Uwہ9ܕBUz4{[քe0"an#2~O9qkFN˥6Wnd1x`kM]KF!l͝h%1MQz >[tX)`['d5y 4`m|o5)f[V߹an(f~Dz9+ ʽ)MYך3m제#rsև:Up_:Fne!TѡY_۽uzL9VqX5l%X] h ꇀ9UڨkPxb+bdq7g n߮0] M#;ZF`\~U+g_a8:#ƂA^=6ǟdHE6- }4W S!z11%yMKY;6ۭ*Rqv|ɈCf cesPIEߓm [%v6ĂqcHA-y0eͣXCԧiiߦC ȓDcA3O*y[9ZW NQ9ԔUN ~;n~urL+ YyxHDw.F _G<4H@CUI^(`O#nqIj2'W`~z:| (c$IzAFLJ0w9Xxo!\r=hgW)#ﺅsvNԁ-sL52!u `v$+$%B뚩lxW¸W o WYcwS{!Gv)аw6JؓX'%!HqK326RGM@ÛjM+!5N4xgs iC/OK'e"oJRFyl%v 䍭 ' 0%a~T`a&/NAYd&UöD.q&q|8xJ,exA0*.Et T\ݑEs]>rg!,o>@־9{=•!8ȁΌ:/ !kBQ upgMn?$@N=s^Bj9EI]"-$${9l~ <tQ0tL>]Q)zcP a:r? MemX`25*в6qXx֠33Sp&0.Cq`}kr4Jb"Zf%ܤjUޒ䍽ȧjZ:Jr i^Ei@fcin7̺.ͤǸ6>R)$e5or\ޚ|4|&LݗߵW9v5u8B'ǂ΍&c.cRV!ExQw݄L6H3lOGS'F2(c}RJ_𗂋~3{ h i"S d;lbU..wqj+/ p8A_UCH@Q ,D ŞTj 0V`G\o:z9Ɩ&/b\Π* P"d2yv 4ȃd`hnaY}g;/Ϸ՞|:f=aY_i;}kHds%{C4hpuHx]ŸfA¢Rg\zlϽXقyqZ#62JPTL{gcgw {JߣZ*\rcx |gX$'Dw$ɵ5}L)}$ kT`сɬ+H9B ]bSjW1J;k$_' V ff^.;lN)$J:J;S!x,K4̑N̳I;{w`* /B=u{Ʒ7!.|xYPj_M{=5(6?  Y?濫 #Ēk HR(sHXr}c*^㱴eO5, b]'WFc HɕrFtMY?:`PWFBPU7Ad~ EsZJwcxVZ,ej(W<6c'V_yO:Z̻MZȝ/5m*5鳡^wo={m_]Sn M]lČR Ӣ$5.u`7AvH0zp6gJƟ-JU=vI-T~EdK_U۶8!][o3/j.$D24jhgvtr|ŘP"UBzR%"4ο':c2"vWvs'ҕH3] wY}fP «T+HFQ??})4"w2 Y’AgMjNT4מ>*8`%xP{h%YU1_Ωc/d|~;&Bf>\Qy0(kNiD)Z޴ qBsH /;YhM_Baha|XԓՃ8ibiQ׍Aޫ2)Й,N'*e{72Ib* ]i ^O p[g2|8s pFUpCDPC6UhSďIB^*m)(@f?To}_&V|4`;lXjxH>Yҫ7"6_+V ,qΧ1 0V焋E5m`uOXJV9f5v!_l{R9X"m0KW(DmJ"~=+tnZuN0O> MQ" Z :ſA:n\ǺϾѪBtYgѬ!+͐'8Oj(핳{`2]Mos/ m;y4"#T@'yϨJ_ t;D}ݰtҊ̻-"!r|1ض={ٴpŃ\{z°*B$yh=΃/r:r.Kv\kl[Ʉp\@Om>k- VK>ޮ},ߘKF,/m,,6t9c5HA]R-g/+W 9[LX^YrU=NyxXk "/ ԡ9}jr2gsnVPV#c|sp‹J@[%f<|/s{_1s0vAuߴ`1RIvo&/xo83|c 5ݢ `O`BcxR1H\Wp_ YyNǚ (-҈ޣX>ŗ؁Gr$cIg kWnp+/aj4Nȅ.ˮ|c`u !=3=7e@ȢIaQޓé 'XW;vՂ kx`P&֑(guaEBҗ.M,dW{ Hh+5o#HBn7|u-˯7蝊WLZMW֚|V&* n z:3J}pb]VncKWW;?#ErW0WxOjj2a!b8d v^`TFp'WMڡ#./JQbL0]YVE[֗M[Eǒ (CԀ1i846#kuPWWZӽN vCp B ۲kwA"I&d?{~]\x(fF s+yuR 6% I%3/˕^.=5Cf|=6".fG`^ss, cd\j.Ik#I'a<&*v KV߬n,3!z=0zӸv)I> c-U-ZWD>< [,_j]QIHIkcDuD~>?^'7 뒼7 >LȔV '֚pp'~~sv/0wK˸`a>G ^bVr4:Ojֹ?,#fsnap (`MM+&+H+gvo"T8=QQ?J*J} 48iRy']Rۘ)1  \zZ_q[Jv8>$.j=z^B!\DcjXO+q)0%nL)*Z }euŻψ&9?pcJhY ž&Н0gx(@f \D88r4gߓgK#OLg`; HY9[th Ml^ڏ<}r";3᥺5eFu8ޯ7Q̦Fmx<N)澿߶a)EL]X)A 4'=Tg^nI1d:Gލ/^Ηm'OZ[:res'aUp@ﰮ^r#+ol'.H#9п?fTixR%|9~gb L~&nj=uW/ge$w!LD;> <<p.lMx 0 ػW9XSv8McUT8QٚzWnF'iNͺiVc$I!j!./ͨ\"f.[sALKhw,..4{Ik }#k;uV1w>'JY᧜]eJ+]Q $Eg}rZ9K'\i[`5_J&N6):s_㴇u{`Ɏ%\ % W[Jي|4izyڋɜ-C/]դ)PSX`rO`AdrDHR*m7ENn\ ,iGn8#}Fqynq 5)dj[t%}#t9e?RhygSٴ 7]kڊ͡}qn9lǾgy?=ԃLs~GMc{[NE?T#<hB-&%7A06UBFA݉vih[xtM֬d`!>X7BzDhl |[}%y] gS`.J)*~_<hV#O[)$'ʀ5rO#)SR~Tw<W |[ a: t'gfڵ7% 2czdhIvtfًqA 5c\ȈkU,!j\,+k03?5ɸDB%AA dUu{0,fg 1Pc_ejH(vu2,)DQSGi(]6IaBoA&ۆ((pA3%Gl39xm&?-⚎̱4O֧Ȣ )=f"c-0-w#L)+s$t>w.5-8 ňTdeV9 6O),>Uq8/Ɯ%|YU 91 =|nt"ĸ{qƤeWayRf.` %Abnx!KA䓯Y>P 8#BTfr>;[qomδB'@qGh` :%?pA'D6!W_ !$TLle!,-C:M%?tEs^/r*8t\1t.ՖE di9\;9`w Aw2 Dj1"[bk- *GnΊԁgxchW5NzݬB iJp #s_1zi"l0) nˆS;aushB OIp}hgu?˭5UVZD_G:&xʃgju~Fp_R :J%qݥ.8Rs"r7YHh4 |GGpJ#9!hJ돢\L,8sT\3]K51tకUXsV\~ l~D:> %-缑{$Ou/mYY̊"* 9{TU:at'lgioi VF:ڝLCFnK&4F+bT}!WAE>Y@8B`"("k 8C!|LsNA:R)T[M$؟Gh^ L;fYl<>g*76^"mmb~$B&U2%*g?1ngskox0r-8r8Ί+:l*Q*BCSMƓG ߄>`0&$qOӈ wh_ :U 76L&htz')󹻘CƑ?=S+< /E7>#;sd㽛,he *r)FV1`mfb4G5+Z-Nчۋ ϚɩTY͜z.8Lhj[!}jU0_ݭbXZ+ D-؝ 'ip{[葉ݫb 60]*FF8 []WN%C#$Pn;& &k󎯭R!0dUdk-nۺd"mU?>~p>{=f&jlgdо&&3oD孑4Ü 8geTcq* ̸|糢^R3&dE/`XT*!u])߰Ƹ~ߤׇKإ?̳姭 Lu 4BzLUnb .E/ju5[@x-(gp5Z9#ǡTlz$LODJ"A#@n |B51m_*N rC6_?:C_]Qʂ75)%ѧ=";a C8o82ʻ0s-k==BfI E'ѕTe 8?EB7IDLO%WA^+sk@3BVT_+ꜯ#b~[9jK\#X _hm!\̴D[k9Gs21WX cTxu>8Cd*)z=` O2}Wi;I6F%z`s[oԝKO$i$*34FFi< 9?c`,x=Yd?*,h$H* ,1xVnMa=mh% ⨛3!#ti|AI*`y!9-Ig&kBf~ PqnX>*me6IDjHաaZU瀓-#jhM$ v"G}7?ށpFV`8P5o3%F އx8 q,* q "#OGCƂI?(z6@8q˪qZx~-Q (S}XóEQKp>bat楏bI=[~Fٔ.5 B~{$uA"qqs?Ũce0x"S\Io]ms*nTheԉ:fw<ۧ+95xfcdfHo1qۼ=YT[^Hɺ#;'^J5{3{pp ՄX@ˌrUm܉Gv`d ["ySJ1]]1 =S)?mKYj5V_$ZuCJi( p3RqUY"c(smk JX_YN?=zAYBw{I^ul +tEf{2>c}& (hy }?~ jEzc1i2 Ƥo` *Wf.ErrZIcV̯Alk2s&IGJLt"l:ޑ/->/bOO-{Tx]YԿ*%8ܬj ?u6fB>j? *jޞ'J~?mCY<2ep顆Rw(fF%giSSw7PWJ3{0MzJ4K90jc\#\=|̓vUk|Uh6YbEu->c$. q+mN`d jA yD趄N!_/e1z|9Sxaz^R*}7ԻU/τӚЇLJ闺:2CmK l{ġ, PQ>c_/PS+ ~pcʋJ(5Y"řj*K-*8E;e]fʸc ĤY9 @Otȩ_7v@(eh:{ˮ`z!7 D+St\wшnOWN?gV~CQ!0qx#((q Lqu xfic_ĻGZEoH٠ׁ4Vh&*3;!ݑ 0[;L}I |TX1?h"6ʗeyu($=kZݳ&ГŠf\פ=0% F B' $|WiD*Z_SH?p ?bܘ$ai&(8UDN 16O>,p [yf?coNd?`\#kΊא#ex"D yts`eRoM&۾* -"ϩziC?gPznGL竡^¦oy(f`x ֩Udt*>ȭ:(~ I7 6)]îG(ҳf!)Wv(G#SY-Zgc1+pbGU0<=Ua4t0 n^ßcvx[2aH0֖ 91AYDHF_ϧn4g$"[k7ʡ*Fuΰ;@gwT ~I/F={aƁ=W굃{L}Ž{熨l"@-Evb2_>fKw$V/"m.\oPGc" YUD8Soa&x'Ӷ9{ u5qų{*˶.jsL"$*8pxHDZGu.*%FuFuvFVΏ>r嚕j K(nDV m韐/cձҩ*:?%!GyQcS:Tʏ1::5h" VL ՠ:kVT _>R}pG1N:1Z;>!/w:y25K:*n?soďm K !ȾK3AAȲ{W<:^iR` ݷF9Wc.eQ:Q`76e9'l -hKR*U+hmfY $,& ]tbl@~V$C(r^SkBu\;,&1|[ uF[EZRV'MQuD9]ο=F(,k]ƭ&؆' F*Vc/zKa0 YZipred/data/dystrophy.rda0000644000176200001440000000614514172231220015023 0ustar liggesusersBZh91AY&SYDzoo ]ÎXyV"KݲPh?DSTQd~Mh#2yOMi2Qe L b4a<Q#4fM Gid=OS?JJxQ쨓&ҍPPPhz4H$zPjS@=#AMh d$ҊDF†=M=OP=OP@=Mh 2fzɀhLi&ihOB`4 2 244И$Th jHz @l7k )& jh/j~LeT# YA!]XDC*3c$YZx%JdeDwRJ3*0exXrڅ/7UC[g.εs34!IPTen)4$vq`[ [llPTb'n"û l*R%YY uxkvQ/RֱfU^OIҳqc{J0o4[N7Ҹh .>Q09fZ͍[qGҌ3Ta]Ȭ^nkL;)L &!\{<rWL#[RAoc3ʪ o=GJ VFң+jw 8.Jb 3pp6j"qtKʶ+>{;VΩQEB4%+J53qv8ѸWqr`*,UXu Kos 0aXPQTDU`j0EA*bb WOqn'9_[a=aEQz)ŒtL!O TC#3 i@|"Ÿm?o&80Wfd A#.<.G4VbtQ.B.l{};kXʼny>as\tB 5d$9۝MZ7f2D:+S՝.Wf^.$"\S.78 knZ G6mPD5 [S;*-N|/F,t`m}ӏNX}bbj=UJ öOJJFڰL2.82ږqOdj}ʰ_nTc |4j4qC`bHz1I*PA0_b)b+@` baPQA`JQee`*(Eua Xuu !hUZiZ RF,j-- Q@PX,iI55upaHs?}5,$I %"RYH$ ŵ%f!"BaABC@UY *YB`EQ) Sv\2$(n5ES 93A=1prsn0%F1\S)(8PqX1Rqp&S(8a ke1X2Q,Xet5V,XJj40 "K3Ir,mŁm!U+,QUQ"P5-ݰ72^> `@`joMAGZlhyǑS<^s%6Y Nc3I@!I a!597d J !9[aAyIt?p_uw N_ t;u8IfD@D4.|U=+Q1q$)+dN7[~jueOy,7 (Kl4KzadV?*`[W7V';ɮX,N 2 ٢:$4P'o*0B(U%.i:&M+eI6+˝%ϮavX9mug>4c:s1ZQ3Kƛ殍έMT25&s6A#j$N !#ȃѵj"^Ŷx-J_yJڗZYKW,o]3dDUAE7]0یKx_MS sy;LӤ1vt1zAGE da'0Ѐn!B o\r]wڟ5TpfrX0H$0JRhdN\ f GvqyAyH4Ecvp}ߑAb*,x c(4438, 232.5, 58)) res <- as.factor(ifelse(apply(res, 1, sum) > 2, 1, 0)) res } response <- classify(Smoking[ ,c("TVPS", "BPNL", "COHB")]) smoking <- data.frame(Smoking, response) formula <- response~TVPS+BPNL+COHB~TarY+NicY+COY+Sex+Age inclass(formula, data = smoking, pFUN = list(list(model = lm, predict = mypredict.lm)), cFUN = classify) } \keyword{misc} ipred/man/sbrier.Rd0000644000176200001440000001107214172231220013700 0ustar liggesusers\name{sbrier} \alias{sbrier} \title{ Model Fit for Survival Data } \description{ Model fit for survival data: the integrated Brier score for censored observations. } \usage{ sbrier(obj, pred, btime= range(obj[,1])) } \arguments{ \item{obj}{an object of class \code{Surv}.} \item{pred}{predicted values. Either a probability or a list of \code{survfit} objects. } \item{btime}{numeric vector of times, the integrated Brier score is computed if this is of \code{length > 1}. The Brier score at \code{btime} is returned otherwise.} } \details{ There is no obvious criterion of model fit for censored data. The Brier score for censoring as well as it's integrated version were suggested by Graf et al (1999). The integrated Brier score is always computed over a subset of the interval given by the range of the time slot of the survival object \code{obj}. } \value{ The (integrated) Brier score with attribute \code{time} is returned. } \seealso{ More measures for the validation of predicted surival probabilities are implemented in package \code{pec}. } \references{ Erika Graf, Claudia Schmoor, Willi Sauerbrei and Martin Schumacher (1999), Assessment and comparison of prognostic classification schemes for survival data. \emph{Statistics in Medicine} \bold{18}(17-18), 2529--2545. } \examples{ library("survival") data("DLBCL", package = "ipred") smod <- Surv(DLBCL$time, DLBCL$cens) KM <- survfit(smod ~ 1) # integrated Brier score up to max(DLBCL$time) sbrier(smod, KM) # integrated Brier score up to time=50 sbrier(smod, KM, btime=c(0, 50)) # Brier score for time=50 sbrier(smod, KM, btime=50) # a "real" model: one single survival tree with Intern. Prognostic Index # and mean gene expression in the first cluster as predictors mod <- bagging(Surv(time, cens) ~ MGEc.1 + IPI, data=DLBCL, nbagg=1) # this is a list of survfit objects (==KM-curves), one for each observation # in DLBCL pred <- predict(mod, newdata=DLBCL) # integrated Brier score up to max(time) sbrier(smod, pred) # Brier score at time=50 sbrier(smod, pred, btime=50) # artificial examples and illustrations cleans <- function(x) { attr(x, "time") <- NULL; names(x) <- NULL; x } n <- 100 time <- rpois(n, 20) cens <- rep(1, n) # checks, Graf et al. page 2536, no censoring at all! # no information: \pi(t) = 0.5 a <- sbrier(Surv(time, cens), rep(0.5, n), time[50]) stopifnot(all.equal(cleans(a),0.25)) # some information: \pi(t) = S(t) n <- 100 time <- 1:100 mod <- survfit(Surv(time, cens) ~ 1) a <- sbrier(Surv(time, cens), rep(list(mod), n)) mymin <- mod$surv * (1 - mod$surv) cleans(a) sum(mymin)/diff(range(time)) # independent of ordering rand <- sample(1:100) b <- sbrier(Surv(time, cens)[rand], rep(list(mod), n)[rand]) stopifnot(all.equal(cleans(a), cleans(b))) \testonly{ # total information: \pi(t | X) known for every obs time <- 1:10 cens <- rep(1,10) pred <- diag(10) pred[upper.tri(pred)] <- 1 diag(pred) <- 0 # # a <- sbrier(Surv(time, cens), pred) # stopifnot(all.equal(a, 0)) # } # 2 groups at different risk time <- c(1:10, 21:30) strata <- c(rep(1, 10), rep(2, 10)) cens <- rep(1, length(time)) # no information about the groups a <- sbrier(Surv(time, cens), survfit(Surv(time, cens) ~ 1)) b <- sbrier(Surv(time, cens), rep(list(survfit(Surv(time, cens) ~1)), 20)) stopifnot(all.equal(a, b)) # risk groups known mod <- survfit(Surv(time, cens) ~ strata) b <- sbrier(Surv(time, cens), c(rep(list(mod[1]), 10), rep(list(mod[2]), 10))) stopifnot(a > b) ### GBSG2 data data("GBSG2", package = "TH.data") thsum <- function(x) { ret <- c(median(x), quantile(x, 0.25), quantile(x,0.75)) names(ret)[1] <- "Median" ret } t(apply(GBSG2[,c("age", "tsize", "pnodes", "progrec", "estrec")], 2, thsum)) table(GBSG2$menostat) table(GBSG2$tgrade) table(GBSG2$horTh) # pooled Kaplan-Meier mod <- survfit(Surv(time, cens) ~ 1, data=GBSG2) # integrated Brier score sbrier(Surv(GBSG2$time, GBSG2$cens), mod) # Brier score at 5 years sbrier(Surv(GBSG2$time, GBSG2$cens), mod, btime=1825) # Nottingham prognostic index GBSG2 <- GBSG2[order(GBSG2$time),] NPI <- 0.2*GBSG2$tsize/10 + 1 + as.integer(GBSG2$tgrade) NPI[NPI < 3.4] <- 1 NPI[NPI >= 3.4 & NPI <=5.4] <- 2 NPI[NPI > 5.4] <- 3 mod <- survfit(Surv(time, cens) ~ NPI, data=GBSG2) plot(mod) pred <- c() survs <- c() for (i in sort(unique(NPI))) survs <- c(survs, getsurv(mod[i], 1825)) for (i in 1:nrow(GBSG2)) pred <- c(pred, survs[NPI[i]]) # Brier score of NPI at t=5 years sbrier(Surv(GBSG2$time, GBSG2$cens), pred, btime=1825) } \keyword{survival} ipred/man/summary.inbagg.Rd0000644000176200001440000000101614172231220015332 0ustar liggesusers\name{summary.inbagg} \alias{summary.inbagg} \alias{print.summary.inbagg} \title{Summarising Inbagg} \description{ Summary of inbagg is returned. } \usage{ \method{summary}{inbagg}(object, ...) } \arguments{ \item{object}{an object of class \code{inbagg}.} \item{\dots}{additional arguments.} } \details{ A representation of an indirect bagging model (the intermediates variables, the number of bootstrap samples, the trees) is printed. } \value{ none } \seealso{\code{\link{print.summary.inbagg}}} \keyword{misc} ipred/man/Smoking.Rd0000644000176200001440000000221214231244645014030 0ustar liggesusers\name{Smoking} \alias{Smoking} \non_function{} \title{Smoking Styles} \usage{data("Smoking")} \description{ The \code{Smoking} data frame has 55 rows and 9 columns. } \format{ This data frame contains the following columns: \describe{ \item{NR}{numeric, patient number.} \item{Sex}{factor, sex of patient.} \item{Age}{factor, age group of patient, grouping consisting of those in their twenties, those in their thirties and so on.} \item{TarY}{numeric, tar yields of the cigarettes.} \item{NicY}{numeric, nicotine yields of the cigarettes.} \item{COY}{numeric, carbon monoxide (CO) yield of the cigarettes.} \item{TVPS}{numeric, total volume puffed smoke.} \item{BPNL}{numeric, blood plasma nicotine level.} \item{COHB}{numeric, carboxyhaemoglobin level, i.e. amount of CO absorbed by the blood stream.} } } \details{ The data describes different smoking habits of probands. } \source{ Hand and Taylor (1987), Study F \emph{Smoking Styles}. } \references{ D.J. Hand and C.C. Taylor (1987), \emph{Multivariate analysis of variance and repeated measures.} London: Chapman & Hall, pp. 167--181. } \keyword{datasets} ipred/man/predict.bagging.Rd0000644000176200001440000000612114172231220015440 0ustar liggesusers\name{predict.classbagg} \alias{predict.classbagg} \alias{predict.regbagg} \alias{predict.survbagg} \title{ Predictions from Bagging Trees } \description{ Predict the outcome of a new observation based on multiple trees. } \usage{ \method{predict}{classbagg}(object, newdata=NULL, type=c("class", "prob"), aggregation=c("majority", "average", "weighted"), \dots) \method{predict}{regbagg}(object, newdata=NULL, aggregation=c("average", "weighted"), \dots) \method{predict}{survbagg}(object, newdata=NULL,\dots) } \arguments{ \item{object}{object of classes \code{classbagg}, \code{regbagg} or \code{survbagg}.} \item{newdata}{a data frame of new observations. } \item{type}{character string denoting the type of predicted value returned for classification trees. Either \code{class} (predicted classes are returned) or \code{prob} (estimated class probabilities are returned).} \item{aggregation}{character string specifying how to aggregate, see below.} \item{...}{additional arguments, currently not passed to any function.} } \details{ There are (at least) three different ways to aggregate the predictions of bagging classification trees. Most famous is class majority voting (\code{aggregation="majority"}) where the most frequent class is returned. The second way is choosing the class with maximal averaged class probability (\code{aggregation="average"}). The third method is based on the "aggregated learning sample", introduced by Hothorn et al. (2003) for survival trees. The prediction of a new observation is the majority class, mean or Kaplan-Meier curve of all observations from the learning sample identified by the \code{nbagg} leaves containing the new observation. For regression trees, only averaged or weighted predictions are possible. By default, the out-of-bag estimate is computed if \code{newdata} is NOT specified. Therefore, the predictions of \code{predict(object)} are "honest" in some way (this is not possible for combined models via \code{comb} in \code{\link{bagging}}). If you like to compute the predictions for the learning sample itself, use \code{newdata} to specify your data. } \value{ The predicted class or estimated class probabilities are returned for classification trees. The predicted endpoint is returned in regression problems and the predicted Kaplan-Meier curve is returned for survival trees. } \references{ Leo Breiman (1996), Bagging Predictors. \emph{Machine Learning} \bold{24}(2), 123--140. Torsten Hothorn, Berthold Lausen, Axel Benner and Martin Radespiel-Troeger (2004), Bagging Survival Trees. \emph{Statistics in Medicine}, \bold{23}(1), 77--91. } \examples{ data("Ionosphere", package = "mlbench") Ionosphere$V2 <- NULL # constant within groups # nbagg = 10 for performance reasons here mod <- bagging(Class ~ ., data=Ionosphere) # out-of-bag estimate mean(predict(mod) != Ionosphere$Class) # predictions for the first 10 observations predict(mod, newdata=Ionosphere[1:10,]) predict(mod, newdata=Ionosphere[1:10,], type="prob") } \keyword{tree} ipred/man/mypredict.lm.Rd0000644000176200001440000000137114172231220015022 0ustar liggesusers\name{mypredict.lm} \alias{mypredict.lm} \title{Predictions Based on Linear Models} \description{ Function to predict a vector of full length (number of observations), where predictions according to missing explanatory values are replaced by \code{NA}. } \usage{ mypredict.lm(object, newdata) } \arguments{ \item{object}{an object of class \code{lm}.} \item{newdata}{matrix or data frame to be predicted according to \code{object}.} } \value{ Vector of predicted values. } \note{\code{predict.lm} delivers a vector of reduced length, i.e. rows where explanatory variables are missing are omitted. The full length of the predicted observation vector is necessary in the indirect classification approach (\code{\link{predict.inclass}}).} \keyword{misc} ipred/man/ipred-internal.Rd0000644000176200001440000000034214172231220015325 0ustar liggesusers\name{ipred-internal} \alias{getsurv} \title{Internal ipred functions} \description{ Internal ipred functions. } \usage{ getsurv(obj, times) } \details{ This functions are not to be called by the user. } \keyword{internal} ipred/man/rsurv.Rd0000644000176200001440000000330414172231220013572 0ustar liggesusers\name{rsurv} \alias{rsurv} \title{ Simulate Survival Data } \description{ Simulation Setup for Survival Data. } \usage{ rsurv(N, model=c("A", "B", "C", "D", "tree"), gamma=NULL, fact=1, pnon=10, gethaz=FALSE) } \arguments{ \item{N}{ number of observations. } \item{model}{ type of model. } \item{gamma}{simulate censoring time as runif(N, 0, gamma). Defaults to \code{NULL} (no censoring).} \item{fact}{scale parameter for \code{model=tree}.} \item{pnon}{number of additional non-informative variables for the tree model.} \item{gethaz}{logical, indicating wheather the hazard rate for each observation should be returned.} } \details{ Simulation setup similar to configurations used in LeBlanc and Crowley (1992) or Keles and Segal (2002) as well as a tree model used in Hothorn et al. (2004). See Hothorn et al. (2004) for the details. } \value{ A data frame with elements \code{time}, \code{cens}, \code{X1} ... \code{X5}. If \code{pnon} > 0, additional noninformative covariables are added. If \code{gethaz=TRUE}, the \code{hazard} attribute returns the hazard rates. } \references{ M. LeBlanc and J. Crowley (1992), Relative Risk Trees for Censored Survival Data. \emph{Biometrics} \bold{48}, 411--425. S. Keles and M. R. Segal (2002), Residual-based tree-structured survival analysis. \emph{Statistics in Medicine}, \bold{21}, 313--326. Torsten Hothorn, Berthold Lausen, Axel Benner and Martin Radespiel-Troeger (2004), Bagging Survival Trees. \emph{Statistics in Medicine}, \bold{23}(1), 77--91. } \examples{ library("survival") # 3*X1 + X2 simdat <- rsurv(500, model="C") coxph(Surv(time, cens) ~ ., data=simdat) } \keyword{survival} ipred/man/kfoldcv.Rd0000644000176200001440000000125214172231220014041 0ustar liggesusers\name{kfoldcv} \alias{kfoldcv} \title{ Subsamples for k-fold Cross-Validation } \description{ Computes feasible sample sizes for the k groups in k-fold cv if N/k is not an integer. } \usage{ kfoldcv(k, N, nlevel=NULL) } \arguments{ \item{k}{ number of groups. } \item{N}{ total sample size. } \item{nlevel}{ a vector of sample sizes for stratified sampling.} } \details{ If N/k is not an integer, k-fold cv is not unique. Determine meaningful sample sizes. } \value{ A vector of length \code{k}. } \examples{ # 10-fold CV with N = 91 kfoldcv(10, 91) \testonly{ k <- sample(5:15, 1) k N <- sample(50:150, 1) N stopifnot(sum(kfoldcv(k, N)) == N) } } \keyword{misc} ipred/man/prune.bagging.Rd0000644000176200001440000000205014172231220015134 0ustar liggesusers\name{prune.classbagg} \alias{prune.classbagg} \alias{prune.regbagg} \alias{prune.survbagg} \title{ Pruning for Bagging } \description{ Prune each of the trees returned by \code{\link{bagging}}. } \usage{ \method{prune}{classbagg}(tree, cp=0.01,...) } \arguments{ \item{tree}{ an object returned by \code{\link{bagging}} (calling this \code{tree} is needed by the generic function \code{prune} in package \code{rpart}).} \item{cp}{complexity parameter, see \code{\link[rpart]{prune.rpart}}.} \item{...}{additional arguments to \code{\link[rpart]{prune.rpart}}.} } \details{ By default, \code{\link{bagging}} grows classification trees of maximal size. One may want to prune each tree, however, it is not clear whether or not this may decrease prediction error. } \value{ An object of the same class as \code{tree} with the trees pruned. } \examples{ data("Glass", package = "mlbench") library("rpart") mod <- bagging(Type ~ ., data=Glass, nbagg=10, coob=TRUE) pmod <- prune(mod) print(pmod) } \keyword{tree} ipred/man/print.cvclass.Rd0000644000176200001440000000106714172231220015206 0ustar liggesusers\name{print.cvclass} \alias{print.cvclass} \alias{print.cvreg} \alias{print.cvsurv} \alias{print.bootestclass} \alias{print.bootestreg} \alias{print.bootestsurv} \title{Print Method for Error Rate Estimators} \description{ Print objects returned by \code{\link{errorest}} in nice layout. } \usage{ \method{print}{cvclass}(x, digits=4, ...) } \arguments{ \item{x}{an object returned by \code{\link{errorest}}.} \item{digits}{how many digits should be printed.} \item{\dots}{further arguments to be passed to or from methods.} } \value{ none } \keyword{misc} ipred/man/predict.slda.Rd0000644000176200001440000000144114172231220014765 0ustar liggesusers\name{predict.slda} \alias{predict.slda} \title{ Predictions from Stabilised Linear Discriminant Analysis } \description{ Predict the class of a new observation based on stabilised LDA. } \usage{ \method{predict}{slda}(object, newdata, ...) } \arguments{ \item{object}{object of class \code{slda}.} \item{newdata}{a data frame of new observations. } \item{...}{additional arguments passed to \code{\link[MASS]{predict.lda}}.} } \details{ This function is a method for the generic function \code{\link{predict}} for class \code{slda}. For the details see \code{\link[MASS]{predict.lda}}. } \value{ A list with components \item{class}{the predicted class (a factor).} \item{posterior}{posterior probabilities for the classes.} \item{x}{the scores of test cases.} } \keyword{multivariate} ipred/man/ipredknn.Rd0000644000176200001440000000260014172231220014221 0ustar liggesusers\name{ipredknn} \alias{ipredknn} \title{ k-Nearest Neighbour Classification } \description{ $k$-nearest neighbour classification with an interface compatible to \code{\link{bagging}} and \code{\link{errorest}}. } \usage{ ipredknn(formula, data, subset, na.action, k=5, \dots) } \arguments{ \item{formula}{a formula of the form \code{lhs ~ rhs} where \code{lhs} is the response variable and \code{rhs} a set of predictors.} \item{data}{optional data frame containing the variables in the model formula.} \item{subset}{optional vector specifying a subset of observations to be used.} \item{na.action}{function which indicates what should happen when the data contain \code{NA}s.} \item{k}{number of neighbours considered, defaults to 5.} \item{...}{additional parameters.} } \details{ This is a wrapper to \code{\link[class]{knn}} in order to be able to use k-NN in \code{\link{bagging}} and \code{\link{errorest}}. } \value{ An object of class \code{ipredknn}. See \code{\link{predict.ipredknn}}. } \examples{ library("mlbench") learn <- as.data.frame(mlbench.twonorm(300)) mypredict.knn <- function(object, newdata) predict.ipredknn(object, newdata, type="class") errorest(classes ~., data=learn, model=ipredknn, predict=mypredict.knn) } \keyword{multivariate} ipred/man/bagging.Rd0000644000176200001440000002330314172231220014010 0ustar liggesusers\name{bagging} \alias{bagging} \alias{ipredbagg} \alias{ipredbagg.factor} \alias{ipredbagg.integer} \alias{ipredbagg.numeric} \alias{ipredbagg.Surv} \alias{ipredbagg.default} \alias{bagging.data.frame} \alias{bagging.default} \title{Bagging Classification, Regression and Survival Trees } \description{ Bagging for classification, regression and survival trees. } \usage{ \method{ipredbagg}{factor}(y, X=NULL, nbagg=25, control= rpart.control(minsplit=2, cp=0, xval=0), comb=NULL, coob=FALSE, ns=length(y), keepX = TRUE, \dots) \method{ipredbagg}{numeric}(y, X=NULL, nbagg=25, control=rpart.control(xval=0), comb=NULL, coob=FALSE, ns=length(y), keepX = TRUE, \dots) \method{ipredbagg}{Surv}(y, X=NULL, nbagg=25, control=rpart.control(xval=0), comb=NULL, coob=FALSE, ns=dim(y)[1], keepX = TRUE, \dots) \method{bagging}{data.frame}(formula, data, subset, na.action=na.rpart, \dots) } \arguments{ \item{y}{the response variable: either a factor vector of class labels (bagging classification trees), a vector of numerical values (bagging regression trees) or an object of class \code{\link[survival]{Surv}} (bagging survival trees).} \item{X}{a data frame of predictor variables.} \item{nbagg}{an integer giving the number of bootstrap replications. } \item{coob}{a logical indicating whether an out-of-bag estimate of the error rate (misclassification error, root mean squared error or Brier score) should be computed. See \code{\link{predict.classbagg}} for details.} \item{control}{options that control details of the \code{rpart} algorithm, see \code{\link[rpart]{rpart.control}}. It is wise to set \code{xval = 0} in order to save computing time. Note that the default values depend on the class of \code{y}.} \item{comb}{a list of additional models for model combination, see below for some examples. Note that argument \code{method} for double-bagging is no longer there, \code{comb} is much more flexible.} \item{ns}{number of sample to draw from the learning sample. By default, the usual bootstrap n out of n with replacement is performed. If \code{ns} is smaller than \code{length(y)}, subagging (Buehlmann and Yu, 2002), i.e. sampling \code{ns} out of \code{length(y)} without replacement, is performed.} \item{keepX}{a logical indicating whether the data frame of predictors should be returned. Note that the computation of the out-of-bag estimator requires \code{keepX=TRUE}.} \item{formula}{a formula of the form \code{lhs ~ rhs} where \code{lhs} is the response variable and \code{rhs} a set of predictors.} \item{data}{optional data frame containing the variables in the model formula.} \item{subset}{optional vector specifying a subset of observations to be used.} \item{na.action}{function which indicates what should happen when the data contain \code{NA}s. Defaults to \code{\link[rpart]{na.rpart}}.} \item{...}{additional parameters passed to \code{ipredbagg} or \code{\link[rpart]{rpart}}, respectively.} } \details{ The random forest implementations \code{\link[randomForest]{randomForest}} and \code{\link[party]{cforest}} are more flexible and reliable for computing bootstrap-aggregated trees than this function and should be used instead. Bagging for classification and regression trees were suggested by Breiman (1996a, 1998) in order to stabilise trees. The trees in this function are computed using the implementation in the \code{\link[rpart]{rpart}} package. The generic function \code{ipredbagg} implements methods for different responses. If \code{y} is a factor, classification trees are constructed. For numerical vectors \code{y}, regression trees are aggregated and if \code{y} is a survival object, bagging survival trees (Hothorn et al, 2003) is performed. The function \code{bagging} offers a formula based interface to \code{ipredbagg}. \code{nbagg} bootstrap samples are drawn and a tree is constructed for each of them. There is no general rule when to stop the tree growing. The size of the trees can be controlled by \code{control} argument or \code{\link{prune.classbagg}}. By default, classification trees are as large as possible whereas regression trees and survival trees are build with the standard options of \code{\link[rpart]{rpart.control}}. If \code{nbagg=1}, one single tree is computed for the whole learning sample without bootstrapping. If \code{coob} is TRUE, the out-of-bag sample (Breiman, 1996b) is used to estimate the prediction error corresponding to \code{class(y)}. Alternatively, the out-of-bag sample can be used for model combination, an out-of-bag error rate estimator is not available in this case. Double-bagging (Hothorn and Lausen, 2003) computes a LDA on the out-of-bag sample and uses the discriminant variables as additional predictors for the classification trees. \code{comb} is an optional list of lists with two elements \code{model} and \code{predict}. \code{model} is a function with arguments \code{formula} and \code{data}. \code{predict} is a function with arguments \code{object, newdata} only. If the estimation of the covariance matrix in \code{\link{lda}} fails due to a limited out-of-bag sample size, one can use \code{\link{slda}} instead. See the example section for an example of double-bagging. The methodology is not limited to a combination with LDA: bundling (Hothorn and Lausen, 2002b) can be used with arbitrary classifiers. NOTE: Up to ipred version 0.9-0, bagging was performed using a modified version of the original rpart function. Due to interface changes in rpart 3.1-55, the bagging function had to be rewritten. Results of previous version are not exactly reproducible. } \value{ The class of the object returned depends on \code{class(y)}: \code{classbagg, regbagg} and \code{survbagg}. Each is a list with elements \item{y}{the vector of responses.} \item{X}{the data frame of predictors.} \item{mtrees}{multiple trees: a list of length \code{nbagg} containing the trees (and possibly additional objects) for each bootstrap sample.} \item{OOB}{logical whether the out-of-bag estimate should be computed.} \item{err}{if \code{OOB=TRUE}, the out-of-bag estimate of misclassification or root mean squared error or the Brier score for censored data.} \item{comb}{logical whether a combination of models was requested.} For each class methods for the generics \code{\link[rpart]{prune.rpart}}, \code{\link{print}}, \code{\link{summary}} and \code{\link{predict}} are available for inspection of the results and prediction, for example: \code{\link{print.classbagg}}, \code{\link{summary.classbagg}}, \code{\link{predict.classbagg}} and \code{\link{prune.classbagg}} for classification problems. } \references{ Leo Breiman (1996a), Bagging Predictors. \emph{Machine Learning} \bold{24}(2), 123--140. Leo Breiman (1996b), Out-Of-Bag Estimation. \emph{Technical Report} \url{https://www.stat.berkeley.edu/~breiman/OOBestimation.pdf}. Leo Breiman (1998), Arcing Classifiers. \emph{The Annals of Statistics} \bold{26}(3), 801--824. Peter Buehlmann and Bin Yu (2002), Analyzing Bagging. \emph{The Annals of Statistics} \bold{30}(4), 927--961. Torsten Hothorn and Berthold Lausen (2003), Double-Bagging: Combining classifiers by bootstrap aggregation. \emph{Pattern Recognition}, \bold{36}(6), 1303--1309. Torsten Hothorn and Berthold Lausen (2005), Bundling Classifiers by Bagging Trees. \emph{Computational Statistics & Data Analysis}, 49, 1068--1078. Torsten Hothorn, Berthold Lausen, Axel Benner and Martin Radespiel-Troeger (2004), Bagging Survival Trees. \emph{Statistics in Medicine}, \bold{23}(1), 77--91. } \examples{ library("MASS") library("survival") # Classification: Breast Cancer data data("BreastCancer", package = "mlbench") # Test set error bagging (nbagg = 50): 3.7\% (Breiman, 1998, Table 5) mod <- bagging(Class ~ Cl.thickness + Cell.size + Cell.shape + Marg.adhesion + Epith.c.size + Bare.nuclei + Bl.cromatin + Normal.nucleoli + Mitoses, data=BreastCancer, coob=TRUE) print(mod) # Test set error bagging (nbagg=50): 7.9\% (Breiman, 1996a, Table 2) data("Ionosphere", package = "mlbench") Ionosphere$V2 <- NULL # constant within groups bagging(Class ~ ., data=Ionosphere, coob=TRUE) # Double-Bagging: combine LDA and classification trees # predict returns the linear discriminant values, i.e. linear combinations # of the original predictors comb.lda <- list(list(model=lda, predict=function(obj, newdata) predict(obj, newdata)$x)) # Note: out-of-bag estimator is not available in this situation, use # errorest mod <- bagging(Class ~ ., data=Ionosphere, comb=comb.lda) predict(mod, Ionosphere[1:10,]) # Regression: data("BostonHousing", package = "mlbench") # Test set error (nbagg=25, trees pruned): 3.41 (Breiman, 1996a, Table 8) mod <- bagging(medv ~ ., data=BostonHousing, coob=TRUE) print(mod) library("mlbench") learn <- as.data.frame(mlbench.friedman1(200)) # Test set error (nbagg=25, trees pruned): 2.47 (Breiman, 1996a, Table 8) mod <- bagging(y ~ ., data=learn, coob=TRUE) print(mod) # Survival data # Brier score for censored data estimated by # 10 times 10-fold cross-validation: 0.2 (Hothorn et al, # 2002) data("DLBCL", package = "ipred") mod <- bagging(Surv(time,cens) ~ MGEc.1 + MGEc.2 + MGEc.3 + MGEc.4 + MGEc.5 + MGEc.6 + MGEc.7 + MGEc.8 + MGEc.9 + MGEc.10 + IPI, data=DLBCL, coob=TRUE) print(mod) } \keyword{tree} ipred/man/print.bagging.Rd0000644000176200001440000000076614172231220015153 0ustar liggesusers\name{print.classbagg} \alias{print} \alias{print.classbagg} \alias{print.regbagg} \alias{print.survbagg} \title{Print Method for Bagging Trees} \description{ Print objects returned by \code{\link{bagging}} in nice layout. } \usage{ \method{print}{classbagg}(x, digits, \dots) } \arguments{ \item{x}{object returned by \code{\link{bagging}}.} \item{digits}{how many digits should be printed.} \item{\dots}{further arguments to be passed to or from methods.} } \value{ none } \keyword{tree} ipred/man/predict.inclass.Rd0000644000176200001440000000644414172231220015506 0ustar liggesusers\name{predict.inclass} \alias{predict.inclass} \title{Predictions from an Inclass Object} \description{ Predicts the class membership of new observations through indirect classification. } \usage{ \method{predict}{inclass}(object, newdata, ...) } \arguments{ \item{object}{ object of class \code{inclass}, see \code{\link{inclass}}.} \item{newdata}{data frame to be classified.} \item{\dots}{additional arguments corresponding to the predictive models specified in \code{\link{inclass}}.} } \details{ Predictions of class memberships are calculated. i.e. values of the intermediate variables are predicted and classified following \code{cFUN}, see \code{\link{inclass}}. } \value{ The vector of predicted classes is returned. } \references{ David J. Hand, Hua Gui Li, Niall M. Adams (2001), Supervised classification with structured class definitions. \emph{Computational Statistics & Data Analysis} \bold{36}, 209--225. Andrea Peters, Berthold Lausen, Georg Michelson and Olaf Gefeller (2003), Diagnosis of glaucoma by indirect classifiers. \emph{Methods of Information in Medicine} \bold{1}, 99-103. } \seealso{\code{\link{inclass}}} \examples{ \dontrun{ # Simulation model, classification rule following Hand et al. (2001) theta90 <- varset(N = 1000, sigma = 0.1, theta = 90, threshold = 0) dataset <- as.data.frame(cbind(theta90$explanatory, theta90$intermediate)) names(dataset) <- c(colnames(theta90$explanatory), colnames(theta90$intermediate)) classify <- function(Y, threshold = 0) { Y <- Y[,c("y1", "y2")] z <- (Y > threshold) resp <- as.factor(ifelse((z[,1] + z[,2]) > 1, 1, 0)) return(resp) } formula <- response~y1+y2~x1+x2 fit <- inclass(formula, data = dataset, pFUN = list(list(model = lm)), cFUN = classify) predict(object = fit, newdata = dataset) data("Smoking", package = "ipred") # explanatory variables are: TarY, NicY, COY, Sex, Age # intermediate variables are: TVPS, BPNL, COHB # reponse is defined by: classify <- function(data){ data <- data[,c("TVPS", "BPNL", "COHB")] res <- t(t(data) > c(4438, 232.5, 58)) res <- as.factor(ifelse(apply(res, 1, sum) > 2, 1, 0)) res } response <- classify(Smoking[ ,c("TVPS", "BPNL", "COHB")]) smoking <- cbind(Smoking, response) formula <- response~TVPS+BPNL+COHB~TarY+NicY+COY+Sex+Age fit <- inclass(formula, data = smoking, pFUN = list(list(model = lm)), cFUN = classify) predict(object = fit, newdata = smoking) } data("GlaucomaMVF", package = "ipred") library("rpart") glaucoma <- GlaucomaMVF[,(names(GlaucomaMVF) != "tension")] # explanatory variables are derived by laser scanning image and intra occular pressure # intermediate variables are: clv, cs, lora # response is defined by classify <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } fit <- inclass(Class~clv+lora+cs~., data = glaucoma, pFUN = list(list(model = rpart)), cFUN = classify) data("GlaucomaM", package = "TH.data") predict(object = fit, newdata = GlaucomaM) } \keyword{misc} ipred/man/DLBCL.Rd0000644000176200001440000000347614172231220013243 0ustar liggesusers\name{DLBCL} \alias{DLBCL} \non_function{} \title{ Diffuse Large B-Cell Lymphoma } \usage{data("DLBCL")} \description{ A data frame with gene expression data from diffuse large B-cell lymphoma (DLBCL) patients. } \format{ This data frame contains the following columns: \describe{ \item{DLCL.Sample}{DLBCL identifier.} \item{Gene.Expression}{Gene expression group.} \item{time}{survival time in month.} \item{cens}{censoring: 0 censored, 1 dead.} \item{IPI}{International prognostic index.} \item{MGEc.1}{mean gene expression in cluster 1.} \item{MGEc.2}{mean gene expression in cluster 2.} \item{MGEc.3}{mean gene expression in cluster 3.} \item{MGEc.4}{mean gene expression in cluster 4.} \item{MGEc.5}{mean gene expression in cluster 5.} \item{MGEc.6}{mean gene expression in cluster 6.} \item{MGEc.7}{mean gene expression in cluster 7.} \item{MGEc.8}{mean gene expression in cluster 8.} \item{MGEc.9}{mean gene expression in cluster 9.} \item{MGEc.10}{mean gene expression in cluster 10.} } } \source{ Except of \code{MGE}, the data is published at \url{http://llmpp.nih.gov/lymphoma/data.shtml}. \code{MGEc.*} is the mean of the gene expression in each of ten clusters derived by agglomerative average linkage hierarchical cluster analysis (Hothorn et al., 2002). } \references{ Ash A. Alizadeh et. al (2000), Distinct types of diffuse large B-cell lymphoma identified by gene expression profiling. \emph{Nature}, \bold{403}, 504--509. Torsten Hothorn, Berthold Lausen, Axel Benner and Martin Radespiel-Troeger (2004), Bagging Survival Trees. \emph{Statistics in Medicine}, \bold{23}, 77--91. } \examples{ suppressWarnings(RNGversion("3.5.3")) set.seed(290875) data("DLBCL", package="ipred") library("survival") survfit(Surv(time, cens) ~ 1, data=DLBCL) } \keyword{datasets} ipred/man/summary.bagging.Rd0000644000176200001440000000103614172231220015503 0ustar liggesusers\name{summary.classbagg} \alias{summary.classbagg} \alias{summary.regbagg} \alias{summary.survbagg} \alias{print.summary.bagging} \title{Summarising Bagging} \description{ \code{summary} method for objects returned by \code{\link{bagging}}. } \usage{ \method{summary}{classbagg}(object, \dots) } \arguments{ \item{object}{object returned by \code{\link{bagging}}.} \item{\dots}{further arguments to be passed to or from methods.} } \details{ A representation of all trees in the object is printed. } \value{ none } \keyword{tree} ipred/man/varset.Rd0000644000176200001440000000323414172231220013717 0ustar liggesusers\name{varset} \alias{varset} \title{Simulation Model} \description{ Three sets of variables are calculated: explanatory, intermediate and response variables. } \usage{ varset(N, sigma=0.1, theta=90, threshold=0, u=1:3) } \arguments{ \item{N}{number of simulated observations.} \item{sigma}{standard deviation of the error term.} \item{theta}{angle between two u vectors.} \item{threshold}{cutpoint for classifying to 0 or 1.} \item{u}{starting values.} } \details{ For each observation values of two explanatory variables \eqn{x = (x_1, x_2)^{\top}} and of two responses \eqn{y = (y_1, y_2)^{\top}} are simulated, following the formula: \deqn{ y = U*x+e = ({u_1^{\top} \atop u_2^{\top}})*x+e } where x is the evaluation of as standard normal random variable and e is generated by a normal variable with standard deviation \code{sigma}. U is a 2*2 Matrix, where \deqn{ u_1 = ({u_{1, 1} \atop u_{1, 2}}), u_2 = ({u_{2, 1} \atop u_{2, 2}}), ||u_1|| = ||u_2|| = 1, } i.e. a matrix of two normalised vectors. } \value{ A list containing the following arguments \item{explanatory}{N*2 matrix of 2 explanatory variables.} \item{intermediate}{N*2 matrix of 2 intermediate variables.} \item{response}{response vectors with values 0 or 1.} } \references{ David J. Hand, Hua Gui Li, Niall M. Adams (2001), Supervised classification with structured class definitions. \emph{Computational Statistics & Data Analysis} \bold{36}, 209--225. } \examples{ theta90 <- varset(N = 1000, sigma = 0.1, theta = 90, threshold = 0) theta0 <- varset(N = 1000, sigma = 0.1, theta = 0, threshold = 0) par(mfrow = c(1, 2)) plot(theta0$intermediate) plot(theta90$intermediate) } \keyword{misc} ipred/man/summary.inclass.Rd0000644000176200001440000000105514172231220015542 0ustar liggesusers\name{summary.inclass} \alias{summary.inclass} \alias{print.summary.inclass} \title{Summarising Inclass} \description{ Summary of inclass is returned. } \usage{ \method{summary}{inclass}(object, ...) } \arguments{ \item{object}{an object of class \code{inclass}.} \item{\dots}{additional arguments.} } \details{ A representation of an indirect classification model (the intermediates variables, which modelling technique is used and the prediction model) is printed. } \value{ none } \seealso{\code{\link{print.summary.inclass}}} \keyword{misc} ipred/man/inbagg.Rd0000644000176200001440000001145114172231220013642 0ustar liggesusers\name{inbagg} \alias{inbagg} \alias{inbagg.default} \alias{inbagg.data.frame} \title{Indirect Bagging} \description{ Function to perform the indirect bagging and subagging. } \usage{ \method{inbagg}{data.frame}(formula, data, pFUN=NULL, cFUN=list(model = NULL, predict = NULL, training.set = NULL), nbagg = 25, ns = 0.5, replace = FALSE, ...) } \arguments{ \item{formula}{formula. A \code{formula} specified as \code{y~w1+w2+w3~x1+x2+x3} describes how to model the intermediate variables \code{w1, w2, w3} and the response variable \code{y}, if no other formula is specified by the elements of \code{pFUN} or in \code{cFUN}} \item{data}{data frame of explanatory, intermediate and response variables.} \item{pFUN}{list of lists, which describe models for the intermediate variables, details are given below.} \item{cFUN}{either a fixed function with argument \code{newdata} and returning the class membership by default, or a list specifying a classifying model, similar to one element of \code{pFUN}. Details are given below.} \item{nbagg}{number of bootstrap samples.} \item{ns}{proportion of sample to be drawn from the learning sample. By default, subagging with 50\% is performed, i.e. draw 0.5*n out of n without replacement.} \item{replace}{logical. Draw with or without replacement.} \item{\dots}{additional arguments (e.g. \code{subset}).} } \details{ A given data set is subdivided into three types of variables: explanatory, intermediate and response variables.\cr Here, each specified intermediate variable is modelled separately following \code{pFUN}, a list of lists with elements specifying an arbitrary number of models for the intermediate variables and an optional element \code{training.set = c("oob", "bag", "all")}. The element \code{training.set} determines whether, predictive models for the intermediate are calculated based on the out-of-bag sample (\code{"oob"}), the default, on the bag sample (\code{"bag"}) or on all available observations (\code{"all"}). The elements of \code{pFUN}, specifying the models for the intermediate variables are lists as described in \code{\link{inclass}}. Note that, if no formula is given in these elements, the functional relationship of \code{formula} is used.\cr The response variable is modelled following \code{cFUN}. This can either be a fixed classifying function as described in Peters et al. (2003) or a list, which specifies the modelling technique to be applied. The list contains the arguments \code{model} (which model to be fitted), \code{predict} (optional, how to predict), \code{formula} (optional, of type \code{y~w1+w2+w3+x1+x2} determines the variables the classifying function is based on) and the optional argument \code{training.set = c("fitted.bag", "original", "fitted.subset")} specifying whether the classifying function is trained on the predicted observations of the bag sample (\code{"fitted.bag"}), on the original observations (\code{"original"}) or on the predicted observations not included in a defined subset (\code{"fitted.subset"}). Per default the formula specified in \code{formula} determines the variables, the classifying function is based on.\cr Note that the default of \code{cFUN = list(model = NULL, training.set = "fitted.bag")} uses the function \code{\link[rpart]{rpart}} and the predict function \code{predict(object, newdata, type = "class")}. } \value{ An object of class \code{"inbagg"}, that is a list with elements \item{mtrees}{a list of length \code{nbagg}, describing the prediction models corresponding to each bootstrap sample. Each element of \code{mtrees} is a list with elements \code{bindx} (observations of bag sample), \code{btree} (classifying function of bag sample) and \code{bfct} (predictive models for intermediates of bag sample).} \item{y}{vector of response values.} \item{W}{data frame of intermediate variables.} \item{X}{data frame of explanatory variables.} } \references{ David J. Hand, Hua Gui Li, Niall M. Adams (2001), Supervised classification with structured class definitions. \emph{Computational Statistics & Data Analysis} \bold{36}, 209--225. Andrea Peters, Berthold Lausen, Georg Michelson and Olaf Gefeller (2003), Diagnosis of glaucoma by indirect classifiers. \emph{Methods of Information in Medicine} \bold{1}, 99-103. } \seealso{\code{\link[rpart]{rpart}}, \code{\link{bagging}}, \code{\link{lm}}} \examples{ library("MASS") library("rpart") y <- as.factor(sample(1:2, 100, replace = TRUE)) W <- mvrnorm(n = 200, mu = rep(0, 3), Sigma = diag(3)) X <- mvrnorm(n = 200, mu = rep(2, 3), Sigma = diag(3)) colnames(W) <- c("w1", "w2", "w3") colnames(X) <- c("x1", "x2", "x3") DATA <- data.frame(y, W, X) pFUN <- list(list(formula = w1~x1+x2, model = lm, predict = mypredict.lm), list(model = rpart)) inbagg(y~w1+w2+w3~x1+x2+x3, data = DATA, pFUN = pFUN) } \keyword{misc} ipred/man/GlaucomaMVF.Rd0000644000176200001440000001214614172231220014516 0ustar liggesusers\name{GlaucomaMVF} \alias{GlaucomaMVF} \non_function{} \title{ Glaucoma Database } \usage{data("GlaucomaMVF")} \description{ The \code{GlaucomaMVF} data has 170 observations in two classes. 66 predictors are derived from a confocal laser scanning image of the optic nerve head, from a visual field test, a fundus photography and a measurement of the intra occular pressure. } \format{ This data frame contains the following predictors describing the morphology of the optic nerve head, the visual field, the intra occular pressure and a membership variable: \describe{ \item{ag}{area global.} \item{at}{area temporal.} \item{as}{area superior.} \item{an}{area nasal.} \item{ai}{area inferior.} \item{eag}{effective area global.} \item{eat}{effective area temporal.} \item{eas}{effective area superior.} \item{ean}{effective area nasal.} \item{eai}{effective area inferior.} \item{abrg}{area below reference global.} \item{abrt}{area below reference temporal.} \item{abrs}{area below reference superior.} \item{abrn}{area below reference nasal.} \item{abri}{area below reference inferior.} \item{hic}{height in contour.} \item{mhcg}{mean height contour global.} \item{mhct}{mean height contour temporal.} \item{mhcs}{mean height contour superior.} \item{mhcn}{mean height contour nasal.} \item{mhci}{mean height contour inferior.} \item{phcg}{peak height contour.} \item{phct}{peak height contour temporal.} \item{phcs}{peak height contour superior.} \item{phcn}{peak height contour nasal.} \item{phci}{peak height contour inferior.} \item{hvc}{height variation contour.} \item{vbsg}{volume below surface global.} \item{vbst}{volume below surface temporal.} \item{vbss}{volume below surface superior.} \item{vbsn}{volume below surface nasal.} \item{vbsi}{volume below surface inferior.} \item{vasg}{volume above surface global.} \item{vast}{volume above surface temporal.} \item{vass}{volume above surface superior.} \item{vasn}{volume above surface nasal.} \item{vasi}{volume above surface inferior.} \item{vbrg}{volume below reference global.} \item{vbrt}{volume below reference temporal.} \item{vbrs}{volume below reference superior.} \item{vbrn}{volume below reference nasal.} \item{vbri}{volume below reference inferior.} \item{varg}{volume above reference global.} \item{vart}{volume above reference temporal.} \item{vars}{volume above reference superior.} \item{varn}{volume above reference nasal.} \item{vari}{volume above reference inferior.} \item{mdg}{mean depth global.} \item{mdt}{mean depth temporal.} \item{mds}{mean depth superior.} \item{mdn}{mean depth nasal.} \item{mdi}{mean depth inferior.} \item{tmg}{third moment global.} \item{tmt}{third moment temporal.} \item{tms}{third moment superior.} \item{tmn}{third moment nasal.} \item{tmi}{third moment inferior.} \item{mr}{mean radius.} \item{rnf}{retinal nerve fiber thickness.} \item{mdic}{mean depth in contour.} \item{emd}{effective mean depth.} \item{mv}{mean variability.} \item{tension}{intra occular pressure.} \item{clv}{corrected loss variance, variability of the visual field.} \item{cs}{contrast sensitivity of the visual field.} \item{lora}{loss of rim area, measured by fundus photography.} \item{Class}{a factor with levels \code{glaucoma} and \code{normal}.} } } \details{ Confocal laser images of the eye background are taken with the Heidelberg Retina Tomograph and variables 1-62 are derived. Most of these variables describe either the area or volume in certain parts of the papilla and are measured in four sectors (temporal, superior, nasal and inferior) as well as for the whole papilla (global). The global measurement is, roughly, the sum of the measurements taken in the four sector. The perimeter `Octopus' measures the visual field variables \code{clv} and \code{cs}, stereo optic disks photographs were taken with a telecentric fundus camera and \code{lora} is derived. Observations of both groups are matched by age and sex, to prevent for possible confounding. } \source{ Andrea Peters, Berthold Lausen, Georg Michelson and Olaf Gefeller (2003), Diagnosis of glaucoma by indirect classifiers. \emph{Methods of Information in Medicine} \bold{1}, 99-103. } \note{ \code{GLaucomMVF} overlaps in some parts with \code{\link[TH.data]{GlaucomaM}}. } \examples{ \dontrun{ data("GlaucomaMVF", package = "ipred") library("rpart") response <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } errorest(Class~clv+lora+cs~., data = GlaucomaMVF, model=inclass, estimator="cv", pFUN = list(list(model = rpart)), cFUN = response) } } \keyword{datasets} ipred/man/dystrophy.Rd0000644000176200001440000000403314172231220014456 0ustar liggesusers\name{dystrophy} \alias{dystrophy} \non_function{} \title{Detection of muscular dystrophy carriers.} \usage{data(dystrophy)} \description{ The \code{dystrophy} data frame has 209 rows and 10 columns. } \format{ This data frame contains the following columns: \describe{ \item{OBS}{numeric. Observation number.} \item{HospID}{numeric. Hospital ID number.} \item{AGE}{numeric, age in years.} \item{M}{numeric. Month of examination.} \item{Y}{numeric. Year of examination.} \item{CK}{numeric. Serum marker creatine kinase.} \item{H}{numeric. Serum marker hemopexin.} \item{PK}{numeric. Serum marker pyruvate kinase.} \item{LD}{numeric. Serum marker lactate dehydroginase.} \item{Class}{factor with levels, \code{carrier} and \code{normal}.} } } \details{ Duchenne Muscular Dystrophy (DMD) is a genetically transmitted disease, passed from a mother to her children. Affected female offspring usually suffer no apparent symptoms, male offspring with the disease die at young age. Although female carriers have no physical symptoms they tend to exhibit elevated levels of certain serum enzymes or proteins. \cr The dystrophy dataset contains 209 observations of 75 female DMD carriers and 134 female DMD non-carrier. It includes 6 variables describing age of the female and the serum parameters serum marker creatine kinase (CK), serum marker hemopexin (H), serum marker pyruvate kinase (PK) and serum marker lactate dehydroginase (LD). The serum markers CK and H may be measured rather inexpensive from frozen serum, PK and LD requires fresh serum. } \source{ D.Andrews and A. Herzberg (1985), Data. Berlin: Springer-Verlag. } \references{ Robert Tibshirani and Geoffry Hinton (1998), Coaching variables for regression and classification. Statistics and Computing 8, 25-33. } \examples{ \dontrun{ data("dystrophy") library("rpart") errorest(Class~CK+H~AGE+PK+LD, data = dystrophy, model = inbagg, pFUN = list(list(model = lm, predict = mypredict.lm), list(model = rpart)), ns = 0.75, estimator = "cv") } } \keyword{datasets} ipred/man/print.inclass.Rd0000644000176200001440000000077614172231220015212 0ustar liggesusers\name{print.inclass} \alias{print.inclass} \title{Print Method for Inclass Object} \description{ Print object of class \code{inclass} in nice layout. } \usage{ \method{print}{inclass}(x, ...) } \arguments{ \item{x}{object of class \code{inclass}.} \item{\dots}{additional arguments.} } \details{ An object of class \code{inclass} is printed. Information about number and names of the intermediate variables, the used modelling technique and the number of drawn bootstrap samples is given. } \keyword{misc} ipred/man/control.errorest.Rd0000644000176200001440000000275214172231220015743 0ustar liggesusers\name{control.errorest} \alias{control.errorest} \title{ Control Error Rate Estimators } \description{ Some parameters that control the behaviour of \code{\link{errorest}}. } \usage{ control.errorest(k = 10, nboot = 25, strat = FALSE, random = TRUE, predictions = FALSE, getmodels=FALSE, list.tindx = NULL) } \arguments{ \item{k}{integer, specify $k$ for $k$-fold cross-validation.} \item{nboot}{integer, number of bootstrap replications.} \item{strat}{logical, if \code{TRUE}, cross-validation is performed using stratified sampling (for classification problems).} \item{random}{logical, if \code{TRUE}, cross-validation is performed using a random ordering of the data.} \item{predictions}{logical, indicates whether the prediction for each observation should be returned or not (classification and regression only). For a bootstrap based estimator a matrix of size 'number of observations' times nboot is returned with predicted values of the ith out-of-bootstrap sample in column i and 'NA's for those observations not included in the ith out-of-bootstrap sample.} \item{getmodels}{logical, indicates a list of all models should be returned. For cross-validation only.} \item{list.tindx}{list of numeric vectors, indicating which observations are included in each bootstrap or cross-validation sample, respectively.} } \value{ A list with the same components as arguments. } \keyword{misc} ipred/man/cv.Rd0000644000176200001440000000363114172231220013024 0ustar liggesusers\name{cv} \alias{cv} \alias{cv.default} \alias{cv.factor} \alias{cv.numeric} \alias{cv.integer} \alias{cv.Surv} \title{Cross-validated Error Rate Estimators.} \description{ Those functions are low-level functions used by \code{\link{errorest}} and are normally not called by users. } \usage{ \method{cv}{factor}(y, formula, data, model, predict, k=10, random=TRUE, strat=FALSE, predictions=NULL, getmodels=NULL, list.tindx = NULL, \dots) } \arguments{ \item{y}{response variable, either of class \code{factor} (classification), \code{numeric} (regression) or \code{Surv} (survival).} \item{formula}{a formula object.} \item{data}{data frame of predictors and response described in \code{formula}.} \item{model}{a function implementing the predictive model to be evaluated. The function \code{model} can either return an object representing a fitted model or a function with argument \code{newdata} which returns predicted values. In this case, the \code{predict} argument to \code{errorest} is ignored.} \item{predict}{a function with arguments \code{object} and \code{newdata} only which predicts the status of the observations in \code{newdata} based on the fitted model in \code{object}.} \item{k}{k-fold cross-validation.} \item{random}{logical, indicates whether a random order or the given order of the data should be used for sample splitting or not, defaults to \code{TRUE}.} \item{strat}{logical, stratified sampling or not, defaults to \code{FALSE}.} \item{predictions}{logical, return the prediction of each observation.} \item{getmodels}{logical, return a list of models for each fold.} \item{list.tindx}{list of numeric vectors, indicating which observations are included in each cross-validation sample.} \item{\dots}{additional arguments to \code{model}.} } \details{ See \code{\link{errorest}}. } \keyword{misc} ipred/man/errorest.Rd0000644000176200001440000002241214172231220014257 0ustar liggesusers\name{errorest} \alias{errorest} \alias{errorest.data.frame} \alias{errorest.default} \title{ Estimators of Prediction Error } \description{ Resampling based estimates of prediction error: misclassification error, root mean squared error or Brier score for survival data. } \usage{ \method{errorest}{data.frame}(formula, data, subset, na.action=na.omit, model=NULL, predict=NULL, estimator=c("cv", "boot", "632plus"), est.para=control.errorest(), ...) } \arguments{ \item{formula}{a formula of the form \code{lhs ~ rhs}. Either describing the model of explanatory and response variables in the usual way (see \code{\link{lm}}) or the model between explanatory and intermediate variables in the framework of indirect classification, see \code{\link{inclass}}.} \item{data}{a data frame containing the variables in the model formula and additionally the class membership variable if \code{model = inclass}. \code{data} is required for indirect classification, otherwise \code{formula} is evaluated in the calling environment.} \item{subset}{optional vector, specifying a subset of observations to be used.} \item{na.action}{function which indicates what should happen when the data contains \code{NA}'s, defaults to \code{\link{na.omit}}.} \item{model}{function. Modelling technique whose error rate is to be estimated. The function \code{model} can either return an object representing a fitted model or a function with argument \code{newdata} which returns predicted values. In this case, the \code{predict} argument to \code{errorest} is ignored.} \item{predict}{function. Prediction method to be used. The vector of predicted values must have the same length as the the number of to-be-predicted observations. Predictions corresponding to missing data must be replaced by \code{NA}. Additionally, \code{predict} has to return predicted values comparable to the responses (that is: factors for classification problems). See the example on how to make this sure for any predictor.} \item{estimator}{estimator of the misclassification error: \code{cv} cross-validation, \code{boot} bootstrap or \code{632plus} bias corrected bootstrap (classification only). } \item{est.para}{a list of additional parameters that control the calculation of the estimator, see \code{\link{control.errorest}} for details.} \item{\dots}{additional parameters to \code{model}.} } \details{ The prediction error for classification and regression models as well as predictive models for censored data using cross-validation or the bootstrap can be computed by \code{errorest}. For classification problems, the estimated misclassification error is returned. The root mean squared error is computed for regression problems and the Brier score for censored data (Graf et al., 1999) is reported if the response is censored. Any model can be specified as long as it is a function with arguments \code{model(formula, data, subset, na.action, ...)}. If a method \code{predict.model(object, newdata, ...)} is available, \code{predict} does not need to be specified. However, \code{predict} has to return predicted values in the same order and of the same length corresponding to the response. See the examples below. $k$-fold cross-validation and the usual bootstrap estimator with \code{est.para$nboot} bootstrap replications can be computed for all kind of problems. The bias corrected .632+ bootstrap by Efron and Tibshirani (1997) is available for classification problems only. Use \code{\link{control.errorest}} to specify additional arguments. \code{errorest} is a formula based interface to the generic functions \code{\link{cv}} or \code{\link{bootest}} which implement methods for classification, regression and survival problems. } \value{ The class of the object returned depends on the class of the response variable and the estimator used. In each case, it is a list with an element \code{error} and additional information. \code{print} methods are available for the inspection of the results. } \references{ Brian D. Ripley (1996), \emph{Pattern Recognition and Neural Networks}. Cambridge: Cambridge University Press. Bradley Efron and Robert Tibshirani (1997), Improvements on Cross-Validation: The .632+ Bootstrap Estimator. \emph{Journal of the American Statistical Association} \bold{92}(438), 548--560. Erika Graf, Claudia Schmoor, Willi Sauerbrei and Martin Schumacher (1999), Assessment and comparison of prognostic classification schemes for survival data. \emph{Statistics in Medicine} \bold{18}(17-18), 2529--2545. Rosa A. Schiavo and David J. Hand (2000), Ten More Years of Error Rate Research. \emph{International Statistical Review} \bold{68}(3), 296-310. David J. Hand, Hua Gui Li, Niall M. Adams (2001), Supervised Classification with Structured Class Definitions. \emph{Computational Statistics & Data Analysis} \bold{36}, 209--225. } \examples{ # Classification data("iris") library("MASS") # force predict to return class labels only mypredict.lda <- function(object, newdata) predict(object, newdata = newdata)$class # 10-fold cv of LDA for Iris data errorest(Species ~ ., data=iris, model=lda, estimator = "cv", predict= mypredict.lda) data("PimaIndiansDiabetes", package = "mlbench") \dontrun{ # 632+ bootstrap of LDA for Diabetes data errorest(diabetes ~ ., data=PimaIndiansDiabetes, model=lda, estimator = "632plus", predict= mypredict.lda) } #cv of a fixed partition of the data list.tindx <- list(1:100, 101:200, 201:300, 301:400, 401:500, 501:600, 601:700, 701:768) errorest(diabetes ~ ., data=PimaIndiansDiabetes, model=lda, estimator = "cv", predict = mypredict.lda, est.para = control.errorest(list.tindx = list.tindx)) \dontrun{ #both bootstrap estimations based on fixed partitions list.tindx <- vector(mode = "list", length = 25) for(i in 1:25) { list.tindx[[i]] <- sample(1:768, 768, TRUE) } errorest(diabetes ~ ., data=PimaIndiansDiabetes, model=lda, estimator = c("boot", "632plus"), predict= mypredict.lda, est.para = control.errorest(list.tindx = list.tindx)) } data("Glass", package = "mlbench") # LDA has cross-validated misclassification error of # 38\% (Ripley, 1996, page 98) # Pruned trees about 32\% (Ripley, 1996, page 230) # use stratified sampling here, i.e. preserve the class proportions errorest(Type ~ ., data=Glass, model=lda, predict=mypredict.lda, est.para=control.errorest(strat=TRUE)) # force predict to return class labels mypredict.rpart <- function(object, newdata) predict(object, newdata = newdata,type="class") library("rpart") pruneit <- function(formula, ...) prune(rpart(formula, ...), cp =0.01) errorest(Type ~ ., data=Glass, model=pruneit, predict=mypredict.rpart, est.para=control.errorest(strat=TRUE)) # compute sensitivity and specifity for stabilised LDA data("GlaucomaM", package = "TH.data") error <- errorest(Class ~ ., data=GlaucomaM, model=slda, predict=mypredict.lda, est.para=control.errorest(predictions=TRUE)) # sensitivity mean(error$predictions[GlaucomaM$Class == "glaucoma"] == "glaucoma") # specifity mean(error$predictions[GlaucomaM$Class == "normal"] == "normal") # Indirect Classification: Smoking data data(Smoking) # Set three groups of variables: # 1) explanatory variables are: TarY, NicY, COY, Sex, Age # 2) intermediate variables are: TVPS, BPNL, COHB # 3) response (resp) is defined by: resp <- function(data){ data <- data[, c("TVPS", "BPNL", "COHB")] res <- t(t(data) > c(4438, 232.5, 58)) res <- as.factor(ifelse(apply(res, 1, sum) > 2, 1, 0)) res } response <- resp(Smoking[ ,c("TVPS", "BPNL", "COHB")]) smoking <- cbind(Smoking, response) formula <- response~TVPS+BPNL+COHB~TarY+NicY+COY+Sex+Age # Estimation per leave-one-out estimate for the misclassification is # 36.36\% (Hand et al., 2001), using indirect classification with # linear models \dontrun{ errorest(formula, data = smoking, model = inclass,estimator = "cv", pFUN = list(list(model=lm, predict = mypredict.lm)), cFUN = resp, est.para=control.errorest(k=nrow(smoking))) } # Regression data("BostonHousing", package = "mlbench") # 10-fold cv of lm for Boston Housing data errorest(medv ~ ., data=BostonHousing, model=lm, est.para=control.errorest(random=FALSE)) # the same, with "model" returning a function for prediction # instead of an object of class "lm" mylm <- function(formula, data) { mod <- lm(formula, data) function(newdata) predict(mod, newdata) } errorest(medv ~ ., data=BostonHousing, model=mylm, est.para=control.errorest(random=FALSE)) # Survival data data("GBSG2", package = "TH.data") library("survival") # prediction is fitted Kaplan-Meier predict.survfit <- function(object, newdata) object # 5-fold cv of Kaplan-Meier for GBSG2 study errorest(Surv(time, cens) ~ 1, data=GBSG2, model=survfit, predict=predict.survfit, est.para=control.errorest(k=5)) } \keyword{misc} ipred/man/bootest.Rd0000644000176200001440000000371214172231220014073 0ustar liggesusers\name{bootest} \alias{bootest} \alias{bootest.default} \alias{bootest.factor} \alias{bootest.numeric} \alias{bootest.integer} \alias{bootest.Surv} \title{Bootstrap Error Rate Estimators} \description{ Those functions are low-level functions used by \code{\link{errorest}} and are normally not called by users. } \usage{ \method{bootest}{factor}(y, formula, data, model, predict, nboot=25, bc632plus=FALSE, list.tindx = NULL, predictions = FALSE, both.boot = FALSE, \dots)} \arguments{ \item{y}{the response variable, either of class \code{factor} (classification), \code{numeric} (regression) or \code{Surv} (survival).} \item{formula}{a formula object.} \item{data}{data frame of predictors and response described in \code{formula}.} \item{model}{a function implementing the predictive model to be evaluated. The function \code{model} can either return an object representing a fitted model or a function with argument \code{newdata} which returns predicted values. In this case, the \code{predict} argument to \code{errorest} is ignored.} \item{predict}{a function with arguments \code{object} and \code{newdata} only which predicts the status of the observations in \code{newdata} based on the fitted model in \code{object}.} \item{nboot}{number of bootstrap replications to be used.} \item{bc632plus}{logical. Should the bias corrected version of misclassification error be computed?} \item{predictions}{logical, return a matrix of predictions. The ith column contains predictions of the ith out-of-bootstrap sample and 'NA's corresponding to the ith bootstrap sample.} \item{list.tindx}{list of numeric vectors, indicating which observations are included in each bootstrap sample.} \item{both.boot}{logical, return both (bootstrap and 632plus) estimations or only one of them.} \item{\dots}{additional arguments to \code{model}.} } \details{ See \code{\link{errorest}}. } \keyword{misc} ipred/DESCRIPTION0000644000176200001440000000211514402371032013056 0ustar liggesusersPackage: ipred Title: Improved Predictors Version: 0.9-14 Date: 2023-03-09 Authors@R: c(person("Andrea", "Peters", role = "aut"), person("Torsten", "Hothorn", role = c("aut", "cre"), email = "Torsten.Hothorn@R-project.org"), person("Brian D.", "Ripley", role = "ctb"), person("Terry", "Therneau", role = "ctb"), person("Beth", "Atkinson", role = "ctb")) Description: Improved predictive models by indirect classification and bagging for classification, regression and survival problems as well as resampling based estimators of prediction error. Depends: R (>= 2.10) Imports: rpart (>= 3.1-8), MASS, survival, nnet, class, prodlim Suggests: mvtnorm, mlbench, TH.data, randomForest, party License: GPL (>= 2) NeedsCompilation: yes Packaged: 2023-03-09 13:48:29 UTC; hothorn Author: Andrea Peters [aut], Torsten Hothorn [aut, cre], Brian D. Ripley [ctb], Terry Therneau [ctb], Beth Atkinson [ctb] Maintainer: Torsten Hothorn Repository: CRAN Date/Publication: 2023-03-09 14:50:02 UTC ipred/build/0000755000176200001440000000000014402361655012462 5ustar liggesusersipred/build/vignette.rds0000644000176200001440000000035414402361655015023 0ustar liggesusersQ 0 ~A+/2=x-.Bvczɝ:q`!i|_򅞦CKrC9!L^b‹ .!ȌEa ôҢ# JʕVџ!E2Ĝ g= 0) { # RNGversion("1.6.2") #} set.seed(29081975) # Classification learn <- as.data.frame(mlbench.twonorm(200)) test <- as.data.frame(mlbench.twonorm(100)) # bagging mod <- bagging(classes ~ ., data=learn, coob=TRUE, nbagg=10) mod predict(mod)[1:10] # Double-Bagging comb.lda <- list(list(model=lda, predict=function(obj, newdata) predict(obj, newdata)$x)) mod <- bagging(classes ~ ., data=learn, comb=comb.lda, nbagg=10) mod predict(mod, newdata=test[1:10,]) predict(mod, newdata=test[1:10,], agg="aver") predict(mod, newdata=test[1:10,], agg="wei") predict(mod, newdata=test[1:10,], type="prob") predict(mod, newdata=test[1:10,], type="prob", agg="aver") predict(mod, newdata=test[1:10,], type="prob", agg="wei") mypredict.lda <- function(object, newdata) predict(object, newdata = newdata)$class errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda) errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, est.para=control.errorest(k=5, random=FALSE)) lapply(errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, est.para=control.errorest(k=5, random=FALSE, getmodels=TRUE))$models, class) errorest(classes ~ ., data=learn, model=bagging, est.para=control.errorest(k=2), nbagg=10) errorest(classes ~ ., data=learn, model=bagging, est.para=control.errorest(k=2), nbagg=10, comb=comb.lda) errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, estimator="boot") errorest(classes ~ ., data=learn, model=lda, predict=mypredict.lda, estimator="632plus") # Regression learn <- as.data.frame(mlbench.friedman1(100)) test <- as.data.frame(mlbench.friedman1(100)) # bagging mod <- bagging(y ~ ., data=learn, coob=TRUE, nbagg=10) mod predict(mod)[1:10] predict(mod, newdata=test[1:10,]) predict(mod, newdata=test[1:10,], agg="aver") predict(mod, newdata=test[1:10,], agg="wei") errorest(y ~ ., data=learn, model=lm) errorest(y ~ ., data=learn, model=lm, est.para=control.errorest(k=5, random=FALSE)) lapply(errorest(y ~ ., data=learn, model=lm, est.para=control.errorest(k=5, random=FALSE, getmodels=TRUE))$models, class) errorest(y ~ ., data=learn, model=lm, estimator="boot") # survival learn <- rsurv(100, model="C") test <- rsurv(100, model="C") mod <- bagging(Surv(time, cens) ~ ., data=learn, nbagg=10) mod predict(mod, newdata=test[1:10,]) #errorest(Surv(time, cens) ~ ., data=learn, model=bagging, # est.para=list(k=2, random=FALSE), nbagg=5) #errorest(Surv(time, cens) ~ ., data=learn, model=bagging, # estimator="boot", nbagg=5, est.para=list(nboot=5)) #insert control.errorest errorest(Surv(time, cens) ~ ., data=learn, model=bagging, est.para=control.errorest(k=2, random=FALSE), nbagg=5) errorest(Surv(time, cens) ~ ., data=learn, model=bagging, estimator="boot", nbagg=5, est.para=control.errorest(nboot=5)) #lapply(errorest(Surv(time, cens) ~ ., data=learn, model=bagging, # estimator="cv", nbagg=1, est.para=list(k=2, random=FALSE, # getmodels=TRUE))$models, class) #insert control.errorest lapply(errorest(Surv(time, cens) ~ ., data=learn, model=bagging, estimator="cv", nbagg=1, est.para=control.errorest(k=2, random=FALSE, getmodels=TRUE))$models, class) # bundling for regression learn <- as.data.frame(mlbench.friedman1(100)) test <- as.data.frame(mlbench.friedman1(100)) comb <- list(list(model=lm, predict=predict.lm)) modc <- bagging(y ~ ., data=learn, nbagg=10, comb=comb) modc predict(modc, newdata=learn)[1:10] # bundling for survival while(FALSE) { data("GBSG2", package = "ipred") rcomb <- list(list(model=coxph, predict=predict)) mods <- bagging(Surv(time,cens) ~ ., data=GBSG2, nbagg=10, comb=rcomb, control=rpart.control(xval=0)) predict(mods, newdata=GBSG2[1:3,]) # test for method dispatch on integer valued responses y <- sample(1:100, 100) class(y) x <- matrix(rnorm(100*5), ncol=5) mydata <- as.data.frame(cbind(y, x)) cv(y, y ~ ., data=mydata, model=lm, predict=predict) bootest(y, y ~ ., data=mydata, model=lm, predict=predict) bagging(y ~., data=mydata, nbagg=10) } ipred/tests/ipred-bugs.Rout.save0000644000176200001440000002454314172231220016372 0ustar liggesusers R Under development (unstable) (2019-04-24 r76421) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library(ipred) > > suppressWarnings(RNGversion("3.5.3")) > actversion <- paste(R.version$major, R.version$minor, sep=".") > thisversion <- "1.7.0" > > #if (compareVersion(actversion, thisversion) >= 0) { > # RNGversion("1.6.2") > #} > set.seed(29081975) > > data("BreastCancer", package = "mlbench") > mod <- bagging(Class ~ Cl.thickness + Cell.size + + Cell.shape + Marg.adhesion + + Epith.c.size + Bare.nuclei + + Bl.cromatin + Normal.nucleoli + + Mitoses, data=BreastCancer, coob=TRUE) > print(mod) Bagging classification trees with 25 bootstrap replications Call: bagging.data.frame(formula = Class ~ Cl.thickness + Cell.size + Cell.shape + Marg.adhesion + Epith.c.size + Bare.nuclei + Bl.cromatin + Normal.nucleoli + Mitoses, data = BreastCancer, coob = TRUE) Out-of-bag estimate of misclassification error: 0.0439 > > print(a <- predict(mod, newdata=BreastCancer)) [1] benign benign benign malignant benign malignant benign [8] benign benign benign benign benign malignant benign [15] malignant malignant benign benign malignant benign malignant [22] malignant benign malignant benign malignant benign benign [29] benign benign benign benign malignant benign benign [36] benign malignant benign malignant malignant malignant malignant [43] malignant malignant malignant benign malignant benign benign [50] malignant malignant malignant malignant malignant malignant malignant [57] malignant malignant malignant malignant malignant benign malignant [64] malignant benign malignant benign malignant malignant benign [71] benign malignant benign malignant malignant benign benign [78] benign benign benign benign benign benign benign [85] malignant malignant malignant malignant benign benign benign [92] benign benign benign benign benign benign benign [99] malignant malignant malignant malignant benign malignant malignant [106] malignant malignant malignant benign malignant benign malignant [113] malignant malignant benign benign benign malignant benign [120] benign benign benign malignant malignant malignant benign [127] malignant benign malignant benign benign benign malignant [134] benign benign benign benign benign benign benign [141] benign benign malignant benign benign benign malignant [148] benign benign malignant benign malignant malignant benign [155] benign malignant benign benign benign malignant malignant [162] benign benign benign benign benign malignant malignant [169] benign benign benign benign benign malignant malignant [176] malignant benign malignant benign malignant benign benign [183] benign malignant malignant benign malignant malignant malignant [190] benign malignant malignant benign benign benign benign [197] benign benign benign benign malignant malignant benign [204] benign benign malignant malignant benign benign benign [211] malignant malignant benign malignant malignant malignant benign [218] benign malignant benign benign malignant malignant malignant [225] malignant benign malignant malignant benign malignant malignant [232] malignant benign malignant benign benign malignant malignant [239] malignant malignant benign benign benign benign benign [246] benign malignant malignant benign benign benign malignant [253] benign malignant malignant malignant benign benign benign [260] benign malignant malignant malignant malignant malignant benign [267] malignant malignant malignant benign malignant benign malignant [274] malignant benign benign benign benign benign malignant [281] benign benign malignant malignant malignant malignant malignant [288] benign malignant malignant benign benign malignant malignant [295] benign malignant benign benign benign malignant malignant [302] benign malignant benign malignant malignant benign benign [309] malignant benign benign benign malignant benign benign [316] malignant malignant malignant benign benign malignant benign [323] benign malignant benign benign malignant benign malignant [330] malignant malignant benign benign malignant malignant benign [337] malignant benign benign malignant malignant benign benign [344] benign malignant benign benign benign malignant malignant [351] benign benign benign malignant benign benign malignant [358] malignant malignant malignant malignant malignant benign benign [365] benign benign malignant malignant benign benign benign [372] benign benign benign benign benign benign benign [379] benign benign benign malignant benign benign benign [386] benign malignant benign benign benign benign malignant [393] benign benign benign benign benign benign benign [400] benign malignant benign benign benign benign benign [407] benign benign benign benign benign benign malignant [414] benign malignant benign malignant benign benign benign [421] benign malignant benign benign benign malignant benign [428] malignant benign benign benign benign benign benign [435] benign malignant malignant benign benign benign malignant [442] benign benign benign benign benign benign benign [449] benign malignant benign benign benign malignant benign [456] malignant malignant malignant benign benign benign benign [463] benign benign benign malignant malignant malignant benign [470] benign benign benign benign benign benign benign [477] benign benign benign malignant benign benign malignant [484] malignant benign benign benign malignant malignant malignant [491] benign malignant benign malignant benign benign benign [498] benign benign benign benign benign benign benign [505] benign benign malignant benign benign benign benign [512] benign benign benign malignant malignant benign benign [519] benign malignant benign benign malignant malignant benign [526] benign benign benign benign benign malignant benign [533] benign benign benign benign benign benign benign [540] benign benign benign benign benign benign benign [547] malignant benign benign malignant benign benign benign [554] benign benign benign benign benign benign benign [561] benign benign benign benign benign malignant benign [568] benign malignant malignant malignant malignant benign benign [575] malignant benign benign benign benign benign benign [582] malignant malignant benign benign benign malignant benign [589] malignant benign malignant malignant malignant benign malignant [596] benign benign benign benign benign benign benign [603] benign malignant malignant malignant benign benign malignant [610] benign malignant malignant malignant benign benign benign [617] benign benign benign benign benign benign benign [624] benign benign benign malignant benign benign benign [631] benign benign benign malignant benign benign malignant [638] benign benign benign benign benign benign benign [645] benign benign benign benign malignant benign benign [652] benign benign benign benign benign benign benign [659] malignant benign benign benign benign benign benign [666] benign benign benign malignant malignant malignant benign [673] benign benign benign benign benign benign benign [680] benign malignant malignant benign benign benign benign [687] benign benign benign benign benign malignant benign [694] benign benign benign malignant malignant malignant Levels: benign malignant > stopifnot(length(a) == nrow(BreastCancer)) > > # bagging failed if only one predictor was specified > # by Christoph M. Friedrich , April 29th, 2002 > > X <- as.data.frame(matrix(rnorm(1000), ncol=10)) > y <- factor(ifelse(apply(X, 1, mean) > 0, 1, 0)) > learn <- cbind(y, X) > mt <- bagging(y ~ V1, data=learn, coob=TRUE) > # > # This won't work because of some difficulties with predict.lda > # mt <- bagging(y ~ V1, data=learn, method="double", coob=FALSE) > # > X <- as.data.frame(matrix(rnorm(1000), ncol=10)) > y <- apply(X, 1, mean) + rnorm(nrow(X)) > learn <- cbind(y, X) > mt <- bagging(y ~ V1, data=learn, coob=TRUE) > > # cv.numeric and bootest.numeric were broken, check for reasonaly values > X <- as.data.frame(matrix(rnorm(1000), ncol=10)) > y <- apply(X, 1, mean) + rnorm(nrow(X)) > learn <- cbind(y, X) > newy <- apply(X, 1, mean) + rnorm(nrow(X)) > mod <- lm(y ~ ., data=learn) > trueerr <- sqrt(mean((newy - fitted(mod))^2)) > cverr <- rep(0,5) > for (i in 1:5) cverr[i] <- errorest(y ~., data=learn, model=lm)$error > booterr <- errorest(y ~., data=learn, model=lm, + estimator="boot",est.para=control.errorest(nboot=50))$error > print(trueerr/mean(cverr)) [1] 0.9612632 > print(trueerr/booterr) [1] 0.9073771 > > proc.time() user system elapsed 2.980 0.156 3.131 ipred/tests/ipred-smalltest.Rout.save0000644000176200001440000000306714172231220017440 0ustar liggesusers R Under development (unstable) (2019-04-24 r76421) -- "Unsuffered Consequences" Copyright (C) 2019 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > library(ipred) > suppressWarnings(RNGversion("3.5.3")) > > # check if SdiffKM computes > # > # int_start^stop (exp(-h*t) - c)^2 dt > # > # in the correct way > > # low-level interface needed > myfoo <- function(times, prob, h, window=0.0001) { + .Call("SdiffKM", as.double(c(0, times)), + as.double(c(prob[1], prob)), as.double(c(h, + window)), PACKAGE = "ipred") + } > > # to compare with > mexp <- function(start, stop, haz, c=0) { + foo <- function(t) + exp(-2*haz*t)/(-2*haz) - 2*c*exp(-haz*t)/(-haz) + c^2*t + foo(stop) - foo(start) + } > > > times <- seq(from=0.01, to=8, by=0.01) > > for (myc in c(0,0.5,0.9)) { + for (h in c(1,2,3)) { + prob <- rep(myc, length(times)) + a <- round(mexp(0, max(times), h, c=myc),7) + b <- round(myfoo(times, prob, h), 7) + stopifnot(all.equal(a,b)) + } + } > > proc.time() user system elapsed 1.696 0.168 1.862 ipred/tests/ipred-smalltest.R0000644000176200001440000000143214172231220015745 0ustar liggesusers library(ipred) suppressWarnings(RNGversion("3.5.3")) # check if SdiffKM computes # # int_start^stop (exp(-h*t) - c)^2 dt # # in the correct way # low-level interface needed myfoo <- function(times, prob, h, window=0.0001) { .Call("SdiffKM", as.double(c(0, times)), as.double(c(prob[1], prob)), as.double(c(h, window)), PACKAGE = "ipred") } # to compare with mexp <- function(start, stop, haz, c=0) { foo <- function(t) exp(-2*haz*t)/(-2*haz) - 2*c*exp(-haz*t)/(-haz) + c^2*t foo(stop) - foo(start) } times <- seq(from=0.01, to=8, by=0.01) for (myc in c(0,0.5,0.9)) { for (h in c(1,2,3)) { prob <- rep(myc, length(times)) a <- round(mexp(0, max(times), h, c=myc),7) b <- round(myfoo(times, prob, h), 7) stopifnot(all.equal(a,b)) } } ipred/tests/ipred-bugs.R0000644000176200001440000000336714172231220014706 0ustar liggesuserslibrary(ipred) suppressWarnings(RNGversion("3.5.3")) actversion <- paste(R.version$major, R.version$minor, sep=".") thisversion <- "1.7.0" #if (compareVersion(actversion, thisversion) >= 0) { # RNGversion("1.6.2") #} set.seed(29081975) data("BreastCancer", package = "mlbench") mod <- bagging(Class ~ Cl.thickness + Cell.size + Cell.shape + Marg.adhesion + Epith.c.size + Bare.nuclei + Bl.cromatin + Normal.nucleoli + Mitoses, data=BreastCancer, coob=TRUE) print(mod) print(a <- predict(mod, newdata=BreastCancer)) stopifnot(length(a) == nrow(BreastCancer)) # bagging failed if only one predictor was specified # by Christoph M. Friedrich , April 29th, 2002 X <- as.data.frame(matrix(rnorm(1000), ncol=10)) y <- factor(ifelse(apply(X, 1, mean) > 0, 1, 0)) learn <- cbind(y, X) mt <- bagging(y ~ V1, data=learn, coob=TRUE) # # This won't work because of some difficulties with predict.lda # mt <- bagging(y ~ V1, data=learn, method="double", coob=FALSE) # X <- as.data.frame(matrix(rnorm(1000), ncol=10)) y <- apply(X, 1, mean) + rnorm(nrow(X)) learn <- cbind(y, X) mt <- bagging(y ~ V1, data=learn, coob=TRUE) # cv.numeric and bootest.numeric were broken, check for reasonaly values X <- as.data.frame(matrix(rnorm(1000), ncol=10)) y <- apply(X, 1, mean) + rnorm(nrow(X)) learn <- cbind(y, X) newy <- apply(X, 1, mean) + rnorm(nrow(X)) mod <- lm(y ~ ., data=learn) trueerr <- sqrt(mean((newy - fitted(mod))^2)) cverr <- rep(0,5) for (i in 1:5) cverr[i] <- errorest(y ~., data=learn, model=lm)$error booterr <- errorest(y ~., data=learn, model=lm, estimator="boot",est.para=control.errorest(nboot=50))$error print(trueerr/mean(cverr)) print(trueerr/booterr) ipred/src/0000755000176200001440000000000014402361655012152 5ustar liggesusersipred/src/init.c0000644000176200001440000000074114172231220013247 0ustar liggesusers #include #include #include // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .Call calls */ extern SEXP SdiffKM(SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"SdiffKM", (DL_FUNC) &SdiffKM, 3}, {NULL, NULL, 0} }; void R_init_ipred(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } ipred/src/SdiffKM.c0000644000176200001440000000434214172231220013570 0ustar liggesusers/* $Id: SdiffKM.c,v 1.2 2003/03/27 13:36:02 hothorn Exp $ SdiffKM: integrated squared difference between survival curve and KM estimator */ #include #include #include SEXP SdiffKM(SEXP time, SEXP prob, SEXP args) { SEXP rint; double d, p, helpone, helptwo, k; double myint = 0.0; double hazard, window, tw; int i, j, n; /* check arguments */ if (!isVector(time)) error("Argument time is not a vector"); n = LENGTH(time); if (REAL(time)[0] != 0.0) error("time[1] must be zero"); if (!isVector(prob)) error("Argument prob is not a vector"); if (REAL(prob)[0] > 1.0) error("prob[1] must be less or equal 1"); if (REAL(prob)[0] < 0.0) error("prob[1] must be greater or equal 0"); if (LENGTH(prob) != n) error("prob and time differ in their length"); if (!isVector(args) || LENGTH(args) != 2) error("Argument args must be vector with two elements"); hazard = REAL(args)[0]; window = REAL(args)[1]; /* prepare for return values */ PROTECT(rint = allocVector(REALSXP, 1)); UNPROTECT(1); REAL(rint)[0] = 0.0; /* for all discrete times */ for (i = 0; i < n-1; i++) { /* get difference between times */ d = REAL(time)[i+1] - REAL(time)[i]; /* estimated survival probability at this time */ p = REAL(prob)[i]; /* if the difference is small enough */ if (d < window) { helpone = p - exp(-REAL(time)[i] * hazard); helptwo = p - exp(-REAL(time)[i+1] * hazard); /* mean of over and under sum */ myint += 0.5 * d * (helpone*helpone + helptwo*helptwo); } else { /* split up in smaller pieces */ k = ftrunc(d/window) + 1; tw = d/k; for (j = 0; j < k; j++) { helpone = p - exp(-(REAL(time)[i] + j*tw)*hazard); helptwo = p - exp(-(REAL(time)[i] + (j+1)*tw)*hazard); /* mean of over and under sum for all small windows */ myint += 0.5*tw*(helpone*helpone + helptwo*helptwo); } } } /* ok, get outa here */ REAL(rint)[0] = myint; return(rint); } ipred/vignettes/0000755000176200001440000000000014402361655013373 5ustar liggesusersipred/vignettes/ipred.bib0000644000176200001440000000400314172231220015135 0ustar liggesusers@article{breiman:1996, key = {53}, author = {L. Breiman}, title = {Bagging Predictors}, journal = {Machine Learning}, pages = {123-140}, year = {1996}, volume = {24}, number = {2} } @article{efron:1997, key = {52}, author = {B. Efron and R. Tibshirani}, title = {Improvements on Cross-Validation: The .632+ Bootstrap Method}, journal = {Journal of the American Statistical Association}, pages = {548-560}, year = {1997}, volume = {92}, number = {438} } @article{hand:2001, key = {32}, author = {D.J. Hand and H.G. Li and N.M. Adams}, title = {Supervised classification with structured class definitions}, journal = {Computational Statistics \& Data Analysis}, pages = {209-225}, year = {2001}, volume = {36} } @inproceedings{ifcs:2001, author = {A. Peters and T. Hothorn and B. Lausen}, title = {Glaucoma diagnosis by indirect classifiers}, booktitle = {Studies in Classification, Data Analysis, and Knowledge Organization (to appear)}, organization = {Proceedings of the 8th Conference of the International Federation of Classification Societies}, year = {2002} } @techreport{out-of-bag:1996, key = {T162}, author = {Leo Breiman}, title = {Out-Of-Bag Estimation}, institution = {Statistics Department, University of California Berkeley}, year = {1996}, address = {Berkeley CA 94708} } @article{double-bag:2002, key = {247}, author = {Torsten Hothorn and Berthold Lausen}, title = {Double-Bagging: Combining classifiers by bootstrap aggregation}, journal = {Pattern Recognition}, year = {2003}, pages = {1303-1309}, volume = {36}, number = {6} } @article{Rnews:Peters+Hothorn+Lausen:2002, key = {308}, author = {Andrea Peters and Torsten Hothorn and Berthold Lausen}, title = {ipred: Improved Predictors}, journal = {R News}, year = 2002, month = {June}, volume = 2, number = 2, pages = {33--36}, url = {http://CRAN.R-project.org/doc/Rnews/} } ipred/vignettes/ipred-examples.Rnw0000644000176200001440000004270414172231220016775 0ustar liggesusers\documentclass[11pt]{article} \usepackage[round]{natbib} \usepackage{bibentry} \usepackage{amsfonts} \usepackage{hyperref} \renewcommand{\baselinestretch}{1.3} \newcommand{\ipred}{\texttt{ipred }} %\VignetteIndexEntry{Some more or less useful examples for illustration.} %\VignetteDepends{ipred} %\textwidth=6.2in %\VignetteDepends{mvtnorm,TH.data,rpart,MASS} \begin{document} \title{\ipred: Improved Predictors} \date{} \SweaveOpts{engine=R,eps=TRUE,pdf=TRUE} <>= options(prompt=">", width=50) set.seed(210477) @ \maketitle This short manual is heavily based on \cite{Rnews:Peters+Hothorn+Lausen:2002} and needs some improvements. \section{Introduction} In classification problems, there are several attempts to create rules which assign future observations to certain classes. Common methods are for example linear discriminant analysis or classification trees. Recent developments lead to substantial reduction of misclassification error in many applications. Bootstrap aggregation \citep[``bagging'',][]{breiman:1996} combines classifiers trained on bootstrap samples of the original data. Another approach is indirect classification, which incorporates a priori knowledge into a classification rule \citep{hand:2001}. Since the misclassification error is a criterion to assess the classification techniques, its estimation is of main importance. A nearly unbiased but highly variable estimator can be calculated by cross validation. \cite{efron:1997} discuss bootstrap estimates of misclassification error. As a by-product of bagging, \cite{out-of-bag:1996} proposes the out-of-bag estimator. \\ However, the calculation of the desired classification models and their misclassification errors is often aggravated by different and specialized interfaces of the various procedures. We propose the \ipred package as a first attempt to create a unified interface for improved predictors and various error rate estimators. In the following we demonstrate the functionality of the package in the example of glaucoma classification. We start with an overview about the disease and data and review the implemented classification and estimation methods in context with their application to glaucoma diagnosis. \section{Glaucoma} Glaucoma is a slowly processing and irreversible disease that affects the optic nerve head. It is the second most reason for blindness worldwide. Glaucoma is usually diagnosed based on a reduced visual field, assessed by a medical examination of perimetry and a smaller number of intact nerve fibers at the optic nerve head. One opportunity to examine the amount of intact nerve fibers is using the Heidelberg Retina Tomograph (HRT), a confocal laser scanning tomograph, which does a three dimensional topographical analysis of the optic nerve head morphology. It produces a series of $32$ images, each of $256 \times 256$ pixels, which are converted to a single topographic image. A less complex, but although a less informative examination tool is the $2$-dimensional fundus photography. However, in cooperation with clinicians and a priori analysis we derived a diagnosis of glaucoma based on three variables only: $w_{lora}$ represents the loss of nerve fibers and is obtained by a $2$-dimensional fundus photography, $w_{cs}$ and $w_{clv}$ describe the visual field defect \citep{ifcs:2001}. \begin{center} \begin{figure}[h] \begin{center} {\small \setlength{\unitlength}{0.6cm} \begin{picture}(14.5,5) \put(5, 4.5){\makebox(2, 0.5){$w_{clv}\geq 5.1$}} \put(2.5, 3){\makebox(2, 0.5){$w_{lora}\geq 49.23$}} \put(7.5, 3){\makebox(2, 0.5){$w_{lora} \geq 58.55$}} \put(0, 1.5){\makebox(2, 0.5){$glaucoma$}} \put(3.5, 1.5){\makebox(2, 0.5){$normal$}} \put(6.5, 1.5){\makebox(2, 0.5){$w_{cs} < 1.405$}} \put(10, 1.5){\makebox(2, 0.5){$normal$}} \put(3.5, 0){\makebox(2, 0.5){$glaucoma$}} \put(6.5, 0){\makebox(2, 0.5){$normal$}} \put(6, 4.5){\vector(-3, -2){1.5}} \put(6, 4.5){\vector(3, -2){1.5}} \put(3.5, 3){\vector(3, -2){1.5}} \put(3.5, 3){\vector(-3, -2){1.5}} \put(8.5, 3){\vector(3, -2){1.5}} \put(8.5, 3){\vector(-3, -2){1.5}} \put(6.5, 1.5){\vector(3, -2){1.5}} \put(6.5, 1.5){\vector(-3, -2){1.5}} \end{picture} } \end{center} \caption{Glaucoma diagnosis. \label{diag}} \end{figure} \end{center} Figure \ref{diag} represents the diagnosis of glaucoma in terms of a medical decision tree. A complication of the disease is that a damage in the optic nerve head morphology precedes a measurable visual field defect. Furthermore, an early detection is of main importance, since an adequate therapy can only slow down the progression of the disease. Hence, a classification rule for detecting early damages should include morphological informations, rather than visual field data only. Two example datasets are included in the package. The first one contains measurements of the eye morphology only (\texttt{GlaucomaM}), including $62$ variables for $196$ observations. The second dataset (\texttt{GlaucomaMVF}) contains additional visual field measurements for a different set of patients. In both example datasets, the observations in the two groups are matched by age and sex to prevent any bias. \section{Bagging} Referring to the example of glaucoma diagnosis we first demonstrate the functionality of the \texttt{bagging} function. We fit \texttt{nbagg = 25} (default) classification trees for bagging by <>= library("ipred") library("rpart") library("MASS") data("GlaucomaM", package="TH.data") gbag <- bagging(Class ~ ., data = GlaucomaM, coob=TRUE) @ where \texttt{GlaucomaM} contains explanatory HRT variables and the response of glaucoma diagnosis (\texttt{Class}), a factor at two levels \texttt{normal} and \texttt{glaucoma}. \texttt{print} returns informations about the returned object, i.e. the number of bootstrap replications used and, as requested by \texttt{coob=TRUE}, the out-of-bag estimate of misclassification error \citep{out-of-bag:1996}. <>= print(gbag) @ The out-of-bag estimate uses the observations which are left out in a bootstrap sample to estimate the misclassification error at almost no additional computational costs. \cite{double-bag:2002} propose to use the out-of-bag samples for a combination of linear discriminant analysis and classification trees, called ``Double-Bagging''. For example, a combination of a stabilised linear disciminant analysis with classification trees can be computed along the following lines <>= scomb <- list(list(model=slda, predict=function(object, newdata) predict(object, newdata)$x)) gbagc <- bagging(Class ~ ., data = GlaucomaM, comb=scomb) @ \texttt{predict} predicts future observations according to the fitted model. <>= predict(gbagc, newdata=GlaucomaM[c(1:3, 99:102), ]) @ Both \texttt{bagging} and \texttt{predict} rely on the \texttt{rpart} routines. The \texttt{rpart} routine for each bootstrap sample can be controlled in the usual way. By default \texttt{rpart.control} is used with \texttt{minsize=2} and \texttt{cp=0} and it is wise to turn cross-validation off (\texttt{xval=0}). The function \texttt{prune} can be used to prune each of the trees to an appropriate size. \section{Indirect Classification} Especially in a medical context it often occurs that a priori knowledge about a classifying structure is given. For example it might be known that a disease is assessed on a subgroup of the given variables or, moreover, that class memberships are assigned by a deterministically known classifying function. \cite{hand:2001} proposes the framework of indirect classification which incorporates this a priori knowledge into a classification rule. In this framework we subdivide a given data set into three groups of variables: those to be used predicting the class membership (explanatory), those to be used defining the class membership (intermediate) and the class membership variable itself (response). For future observations, an indirect classifier predicts values for the appointed intermediate variables based on explanatory variables only. The observation is classified based on their predicted intermediate variables and a fixed classifying function. This indirect way of classification using the predicted intermediate variables offers possibilities to incorporate a priori knowledge by the subdivision of variables and by the construction of a fixed classifying function. We apply indirect classification by using the function \texttt{inclass}. Referring to the glaucoma example, explanatory variables are HRT and anamnestic variables only, intermediate variables are $w_{lora}, \, w_{cs}$ and $w_{clv}$. The response is the diagnosis of glaucoma which is determined by a fixed classifying function and therefore not included in the learning sample \texttt{GlaucomaMVF}. We assign the given variables to explanatory and intermediate by specifying the input formula. <>= data("GlaucomaMVF", package="ipred") GlaucomaMVF <- GlaucomaMVF[,-63] formula.indirect <- Class~clv + lora + cs ~ . @ The variables on the left-hand side represent the intermediate variables, modeled by the explanatory variables on the right-hand side. Almost each modeling technique can be used to predict the intermediate variables. We chose a linear model by \texttt{pFUN = list(list(model = lm))}. <>= classify <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } fit <- inclass(formula.indirect, pFUN = list(list(model = lm)), cFUN = classify, data = GlaucomaMVF) @ \texttt{print} displays the subdivision of variables and the chosen modeling technique <>= print(fit) @ Furthermore, indirect classification predicts the intermediate variables based on the explanatory variables and classifies them according to a fixed classifying function in a second step, that means a deterministically known function for the class membership has to be specified. In our example this function is given in Figure \ref{diag} and implemented in the function \texttt{classify}.\\ Prediction of future observations is now performed by <>= predict(object = fit, newdata = GlaucomaMVF[c(1:3, 86:88),]) @ We perform a bootstrap aggregated indirect classification approach by choosing \texttt{pFUN = bagging} and specifying the number of bootstrap samples \citep{ifcs:2001}. Regression or classification trees are fitted for each bootstrap sample, with respect to the measurement scale of the specified intermediate variables <>= mypredict.rpart <- function(object, newdata) { RES <- predict(object, newdata) RET <- rep(NA, nrow(newdata)) NAMES <- rownames(newdata) RET[NAMES %in% names(RES)] <- RES[NAMES[NAMES %in% names(RES)]] RET } fit <- inbagg(formula.indirect, pFUN = list(list(model = rpart, predict = mypredict.rpart)), cFUN = classify, nbagg = 25, data = GlaucomaMVF) @ The call for the prediction of values remains unchanged. \section{Error Rate Estimation} Classification rules are usually assessed by their misclassification rate. Hence, error rate estimation is of main importance. The function \texttt{errorest} implements a unified interface to several resampling based estimators. Referring to the example, we apply a linear discriminant analysis and specify the error rate estimator by \texttt{estimator = "cv", "boot"} or \texttt{"632plus"}, respectively. A 10-fold cross validation is performed by choosing \texttt{estimator = "cv"} and \texttt{est.para = control.errorest(k = 10)}. The options \texttt{estimator = "boot"} or \texttt{estimator = "632plus"} deliver a bootstrap estimator and its bias corrected version {\sl .632+} \citep[see][]{efron:1997}, we specify the number of bootstrap samples to be drawn by \texttt{est.para = control.errorest(nboot = 50)}. Further arguments are required to particularize the classification technique. The argument \texttt{predict} represents the chosen predictive function. For a unified interface \texttt{predict} has to be based on the arguments \texttt{object} and \texttt{newdata} only, therefore a wrapper function \texttt{mypredict} is necessary for classifiers which require more than those arguments or do not return the predicted classes by default. For a linear discriminant analysis with \texttt{lda}, we need to specify <>= mypredict.lda <- function(object, newdata){ predict(object, newdata = newdata)$class } @ and calculate a 10-fold-cross-validated error rate estimator for a linear discriminant analysis by calling <>= errorest(Class ~ ., data= GlaucomaM, model=lda, estimator = "cv", predict= mypredict.lda) @ For the indirect approach the specification of the call becomes slightly more complicated. %Again for a unified interface a wrapper %function has to be used, which incorporates the fixed classification rule The bias corrected estimator {\sl .632+} is computed by <>= errorest(formula.indirect, data = GlaucomaMVF, model = inclass, estimator = "632plus", pFUN = list(list(model = lm)), cFUN = classify) @ Because of the subdivision of variables and a formula describing the modeling between explanatory and intermediate variables only, we must call the class membership variable. Hence, in contrast to the function \texttt{inclass} the data set \texttt{GlaucomaMVF} used in \texttt{errorest} must contain explanatory, intermediate and response variables. Sometimes it may be necessary to reduce the number of predictors before training a classifier. Estimating the error rate after the variable selection leads to biased estimates of the misclassfication error and therefore one should estimate the error rate of the whole procedure. Within the \texttt{errorest} framework, this can be done as follows. First, we define a function which does both variable selection and training of the classifier. For illustration proposes, we select the predictors by comparing their univariate $P$-values of a two-sample $t$-test with a prespecified level and train a LDA using the selected variables only. <>= mymod <- function(formula, data, level=0.05) { # select all predictors that are associated with an # univariate t.test p-value of less that level sel <- which(lapply(data, function(x) { if (!is.numeric(x)) return(1) else return(t.test(x ~ data$Class)$p.value) }) < level) # make sure that the response is still there sel <- c(which(colnames(data) %in% "Class"), sel) # compute a LDA using the selected predictors only mod <- lda(formula , data=data[,sel]) # and return a function for prediction function(newdata) { predict(mod, newdata=newdata[,sel])$class } } @ Note that \texttt{mymod} does not return an object of class \texttt{lda} but a function with argument \texttt{newdata} only. Thanks to lexical scoping, this function is used for computing predicted classes instead of a function \texttt{predict} passed to \texttt{errorest} as argument. Computing a $5$-fold cross-validated error rate estimator now is approximately a one-liner. <>= errorest(Class ~ . , data=GlaucomaM, model=mymod, estimator = "cv", est.para=control.errorest(k=5)) @ %%To summarize the performance of the different classification techniques in the considered example of glaucoma diagnosis, the 10-fold %%cross-validated error estimator delivers the %%results given in Table \ref{tenf}. %%\begin{figure} %%\begin{center} %%\begin{tabular}{ rrr } %%\hline %%dataset & method & error estimate \\ %%\hline %%\texttt{GlaucomaM} & {\sl slda} & 0.168 \\ %%\texttt{GlaucomaM} & {\sl bagging} & 0.158 \\ %%\texttt{GlaucomaM} & {\sl double-bagging} & 0.153 \\ %%\texttt{GlaucomaMVF} & {\sl inclass-bagging} & 0.206 \\ %%\tetxtt{GlaucomaMVF} & {\sl inclass-lm} & 0.229 \\ %%\hline %%\end{tabular} %%\caption{10-fold cross-validated error estimation of %%the misclassification error for several classification %%methods: {\sl slda} - stabilised linear discriminant analysis, %%{\sl bagging} - bagging with 50 bootstrap samples, %%{\sl double-bagging} - bagging with 50 bootstrap samples, %%combined with sLDA, {\sl inclass-bagging} - %%indirect classification using bagging, %%{\sl inclass-lm} indirect classification using %%linear modeling. \label{tenf}} %%\end{center} %%\end{figure} %%Note that an estimator of the variance is available for the ordinary %%bootstrap estimator (\texttt{estimator="boot"}) only, see \cite{efron:1997}. \section{Summary} \ipred tries to implement a unified interface to some recent developments in classification and error rate estimation. It is by no means finished nor perfect and we very much appreciate comments, suggestions and criticism. Currently, the major drawback is speed. Calling \texttt{rpart} $50$ times for each bootstrap sample is relatively inefficient but the design of interfaces was our main focus instead of optimization. Beside the examples shown, \texttt{bagging} can be used to compute bagging for regression trees and \texttt{errorest} computes estimators of the mean squared error for regression models. \bibliographystyle{plainnat} \bibliography{ipred} \end{document} ipred/R/0000755000176200001440000000000014402361655011564 5ustar liggesusersipred/R/csurv.R0000644000176200001440000000376714172231220013052 0ustar liggesusers# $Id: csurv.R,v 1.6 2003/03/28 12:55:32 hothorn Exp $ csurv <- function(newdata, pred, minprob=0, window=0.0001) { N <- nrow(newdata) if (!"hazard" %in% names(attributes(newdata))) stop("hazards attribute to newdata missing") hazards <- attr(newdata, "hazard") error <- rep(0, N) # if there is only one prediction for all observations GETPROB <- TRUE if (inherits(pred, "survfit")) { times <- pred$time # get times predprob <- getsurv(pred, times) # get steps GETPROB <- FALSE } for (i in 1:N) { if (GETPROB) { times <- pred[[i]]$time # get times predprob <- getsurv(pred[[i]], times) # get steps } # compute the integrated squared difference between # KM and S(t) # minprob: stop integration when S(t) < minprob lasttime <- -(log(minprob) / hazards[i]) if (max(times) > lasttime) { thisprob <- predprob[times <= lasttime] thistimes <- times[times <= lasttime] } else { thisprob <- predprob thistimes <- times } error[i] <- .Call(SdiffKM, as.double(c(0,thistimes)), as.double(c(1,thisprob)), as.double(c(hazards[i], window))) # adjust for time scale by last event error[i] <- error[i]/max(thistimes) if (length(unique(hazards)) == 1) { error <- error[i] break } } error <- mean(error) error } foo <- function (time, prob, hazard, window) { myint <- 0 time <- c(0, time) s <- exp(-time * hazard) prob <- c(1, prob) for (i in 1:(length(time)-1)) { d <- time[i+1] - time[i] if (d < window) { myint <- myint + 0.5 * d * ((prob[i] - s[i])^2 + (prob[i] - s[i + 1])^2) } else { k <- ceiling(d/window) wi <- d/k for (j in 1:k) myint <- myint + 0.5 * wi * ((prob[i] - exp(-(time[i] + (j - 1) * wi) * hazard))^2 + (prob[i] - exp(-(time[i] + j * wi) * hazard))^2) } } myint } ipred/R/print.R0000644000176200001440000001204414172231220013030 0ustar liggesusers#$Id: print.R,v 1.4 2004/02/09 08:08:21 peters Exp $ print.classbagg <- function(x, digits=4, ...) { cat("\n") B <- length(x$mtrees) if (B > 1) method <- paste("Bagging classification trees with", B, "bootstrap replications") else method <- "Classification tree" cat(method, "\n") if (!is.null(x$call)) { cat("\nCall: ") print(x$call) cat("\n") } if (x$OOB) { cat("Out-of-bag estimate of misclassification error: ", round(x$err, digits), "\n") } cat("\n") } print.regbagg <- function(x, digits=4, ...) { cat("\n") B <- length(x$mtrees) if (B > 1) method <- paste("Bagging regression trees with", B, "bootstrap replications") else method <- "Regression tree" cat(method, "\n") if (!is.null(x$call)) { cat("\nCall: ") print(x$call) cat("\n") } if (x$OOB) cat("Out-of-bag estimate of root mean squared error: ", round(x$err, digits), "\n") cat("\n") } print.survbagg <- function(x, digits=4, ...) { cat("\n") B <- length(x$mtrees) if (B > 1) method <- paste("Bagging survival trees with", B, "bootstrap replications") else method <- "Survival tree" cat(method, "\n") if (!is.null(x$call)) { cat("\nCall: ") print(x$call) cat("\n") } if (x$OOB) cat("Out-of-bag estimate of Brier's score: ", round(x$err, digits), "\n") cat("\n") } summary.classbagg <- function(object, ...) { print(object, ...) class(object) <- "summary.bagging" object } summary.regbagg <- function(object, ...) { print(object, ...) class(object) <- "summary.bagging" object } summary.survbagg <- function(object, ...) { print(object, ...) class(object) <- "summary.bagging" object } print.summary.bagging <- function(x, digits = max(3, getOption("digits")-3), ...) { cat("Trees: \n") print(x$mtrees) invisible(x$mtrees) } print.cvclass <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", paste(x$k, "-fold", sep=""), "cross-validation estimator of misclassification error \n") cat("\n") cat("Misclassification error: ", round(x$error, digits), "\n") cat("\n") } print.bootestclass <- function(x, digits=4, ...) { if(all(names(x)[names(x)!="call"] %in% c("boot", "632plus"))) { XX <- x for(i in c("boot", "632plus")) { x <- XX[[i]] x$call <- XX[["call"]] cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } if (x$bc632plus) { cat("\t", ".632+ Bootstrap estimator of misclassification error \n") } else { cat("\t", "Bootstrap estimator of misclassification error \n") } cat("\t with" , x$nboot, "bootstrap replications\n") cat("\n") cat("Misclassification error: ", round(x$error, digits), "\n") if (!x$bc632plus) cat("Standard deviation:", round(x$sd, digits), "\n") cat("\n") } } else { # if(!all(names(x) %in% c("boot", "632plus"))){ cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } if (x$bc632plus) cat("\t", ".632+ Bootstrap estimator of misclassification error \n") else cat("\t", "Bootstrap estimator of misclassification error \n") cat("\t with" , x$nboot, "bootstrap replications\n") cat("\n") cat("Misclassification error: ", round(x$error, digits), "\n") if (!x$bc632plus) cat("Standard deviation:", round(x$sd, digits), "\n") cat("\n") } } print.cvreg <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", paste(x$k, "-fold", sep=""), "cross-validation estimator of root mean squared error\n") cat("\n") cat("Root mean squared error: ", round(x$error, digits), "\n") cat("\n") } print.bootestreg <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", "Bootstrap estimator of root mean squared error \n") cat("\t with" , x$nboot, "bootstrap replications\n") cat("\n") cat("Root mean squared error: ", round(x$error, digits), "\n") cat("\n") } print.cvsurv <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", paste(x$k, "-fold", sep=""), "cross-validation estimator of Brier's score\n") cat("\n") cat("Brier's score: ", round(x$error, digits), "\n") cat("\n") } print.bootestsurv <- function(x, digits=4, ...) { cat("\n") if (!is.null(x$call)) { cat("Call:\n") print(x$call) cat("\n") } cat("\t", "Bootstrap estimator of Brier's score\n") cat("\t with" , x$nboot, "bootstrap replications\n") cat("\n") cat("Brier's score: ", round(x$error, digits), "\n") cat("\n") } ipred/R/inclass.R0000644000176200001440000001423314172231220013332 0ustar liggesusers# $Id: inclass.R,v 1.33 2008/08/04 08:18:41 hothorn Exp $ inclass <- function(formula, data, ...) UseMethod("inclass", data) inclass.default <- function(formula, data, ...) { stop(paste("Do not know how to handle objects of class", class(data))) } inclass.data.frame <- function(formula, data, pFUN = NULL, cFUN = NULL, ...) { ##check formula if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) ##editing formula ###main formula if(length(formula[[2]])==3) { if(is.function(cFUN)) y.formula <- formula[[2]] else y.formula <- cFUN$formula w.formula <- XX~YY w.formula[[2]] <- formula[[2]][[3]] w.formula[[3]] <- formula[[3]] response <- paste(formula[[2]][[2]]) w.names <- attr(terms(as.formula(formula[[2]]), data = data), "term.labels") x.names <- attr(terms(as.formula(formula), data = data), "term.labels") if(x.names[1] == ".") x.names <- colnames(data)[!(colnames(data) %in% c(response, w.names))] } else { stop(paste("Specified formula has to be of type y~x~w")) } if(is.null(w.formula)) stop("no formula for prediction model specified") formula.list <- vector(mode = "list", length= length(w.names)) names(formula.list) <- w.names P <- length(pFUN) Qi <- length(w.names) for(j in 1:Qi) { res <- list() res$formula <- w.formula res$formula[[2]] <- as.name(attr(terms(res$formula[-3], data = data), "term.labels")[j]) if(res$formula[[3]] == ".") { res$formula <- as.formula(paste(res$formula[[2]], "~", paste(x.names, collapse= "+"))) } for(i in 1:P) { if(is.null(pFUN[[i]]$formula)) { if(is.null(formula.list[[w.names[j]]]$formula)) formula.list[[w.names[j]]]$formula <- res$formula if(is.null(formula.list[[w.names[j]]]$model)) formula.list[[w.names[j]]]$model <- pFUN[[i]]$model if(is.null(formula.list[[w.names[j]]]$predict)) formula.list[[w.names[j]]]$predict <- pFUN[[i]]$predict } else { QQ <- attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels") for(k in QQ) { if(w.names[j] == k) { res$formula[[3]] <- pFUN[[i]]$formula[[3]] if(paste(pFUN[[i]]$formula[[3]]) == ".") { res$formula[[3]] <- as.formula(paste(w.names[j], "~", paste(x.names, collapse= "+"))) } formula.list[[w.names[j]]]$formula <- pFUN[[i]]$formula formula.list[[w.names[j]]]$model <- pFUN[[i]]$model formula.list[[w.names[j]]]$predict <- pFUN[[i]]$predict } } } } } if(!is.function(cFUN)) { cFUN$formula <- y.formula if(is.null(cFUN$training.set)) cFUN$training.set <- "original" } result <- workhorse.inclass(object = formula.list, data = data, cFUN = cFUN, ...) return(result) } workhorse.inclass <- function(object, data, cFUN, subset, na.action, ...) { formula.list <- object q <- length(formula.list) result <- list() namen <- c() ##model fitting for(i in 1:q) { formula <- formula.list[[i]]$formula ##check necessary?? > if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") ## check necessary?? < m <- match.call(expand.dots= FALSE) res <- formula.list[[i]]$model(formula = formula, data = data) namen <- c(namen, as.character(formula[[2]])) result <- c(result, list(res)) } names(result) <- namen if(!is.function(cFUN)) { ###cFUN can be trained on original intermediate variables or on fitted values or on the subset if(!is.null(m$subset) && cFUN$training.set == "subset") dataresp <- data[!subset, ] if(cFUN$training.set == "original") dataresp <- data if(cFUN$training.set == "fitted") { dataresp <- data for(i in 1:q){ if(!is.null(formula.list[[i]]$predict)){ dataresp[,namen[i]] <- formula.list[[i]]$predict(result[[i]], newdata = data)} else { dataresp[,namen[i]] <- predict(result[[i]], newdata = data) } } } model.response <- cFUN$model(as.formula(cFUN$formula), data = dataresp, ...) } else { model.response <- cFUN } ###predict specificatiations are not delivered result <- list("model.intermediate" = result, "model.response" = model.response, "para.intermediate" = object, "para.response" = cFUN) class(result) <- "inclass" return(result) } print.inclass <- function(x, ...) { x <- x$model.intermediate q <- length(x) intermediates <- attr(x, "names") classes <- c() for(i in 1:q) { classes <- c(classes, class(x[[i]])) } text.intermediates <- paste("Indirect classification, with", q, "intermediate variables:") if(length(unique(classes)) == 1) { predictive <- paste("Predictive model per intermediate is", unique(classes)) } else { predictive <- paste("Predictive model per intermediate is \n", paste(intermediates, ": ", classes, "\n ", collapse = "")) } cat("\n", text.intermediates, "\n", intermediates, "\n", "\n", predictive, "\n") } summary.inclass <- function(object, ...) { class(object) <- "summary.inclass" object } print.summary.inclass <- function(x, ...) { x <- x$model.intermediate q <- length(x) intermediates <- attr(x, "names") classes <- c() for(i in 1:q) { classes <- c(classes, class(x[[i]])) } text.intermediates <- paste("Indirect classification, with", q, "intermediate variables:") if(length(unique(classes)) == 1) { predictive <- paste("Predictive model per intermediate is", unique(classes)) } else { predictive <- paste("Predictive model per intermediate is", "\n ", paste(intermediates, ": ", classes, "\n ", collapse = "")) } cat("\n", text.intermediates, "\n", intermediates, "\n", "\n", predictive, "\n", "\n", "Models:", "\n") print(x) } ipred/R/errorest.R0000644000176200001440000001302314172231220013537 0ustar liggesusers# $Id: errorest.R,v 1.25 2005/06/29 08:50:28 hothorn Exp $ control.errorest <- function(k= 10, nboot = 25, strat=FALSE, random=TRUE, predictions=FALSE, getmodels=FALSE, list.tindx = NULL) { if (k < 1) { warning("k < 1, using k=10") k <- 10 } if (nboot < 1) { warning("nboot < 1, using nboot=25") nboot <- 25 } if (!is.logical(strat)) { warning("strat is not a logical, using strat=FALSE") strat <- FALSE } if (!is.logical(random)) { warning("random is not a logical, using random=TRUE") random <- TRUE } if (!is.logical(predictions)) { warning("predictions is not a logical, using predictions=FALSE") predictions <- FALSE } if (!is.logical(getmodels)) { warning("getmodel is not a logical, using getmodels=FALSE") getmodels <- FALSE } RET <- list(k=k, nboot=nboot, strat=strat, random=random, predictions=predictions, getmodels=getmodels, list.tindx = list.tindx) return(RET) } errorest <- function(formula, data, ...) UseMethod("errorest", data) errorest.default <- function(formula, data, ...) stop(paste("Do not know how to handle objects of class", class(data))) errorest.data.frame <- function(formula, data, subset, na.action=na.omit, model=NULL, predict=NULL, estimator = c("cv", "boot", "632plus"), est.para = control.errorest(), ...) { cl <- match.call() m <- match.call(expand.dots = FALSE) if (length(grep("inclass", paste(m$model))) > 0 || length(grep("inbagg", paste(m$model))) > 0) { RET <- errorestinclass(formula, data=data, subset, na.action, model, predict, estimator, est.para, ...) RET$call <- cl } else { if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") NOPRED <- (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m[[1]] <- as.name("model.frame") m$... <- NULL m$model <- NULL m$predict <- NULL m$estimator <- NULL m$est.para <- NULL mf <- eval(m, parent.frame()) response <- attr(attr(mf, "terms"), "response") # just extract the data.frame, NA handling here # make sure to leave the time and censoring variable here # for "Surv(time, cens) ~ ." formulas # delete terms attribute attr(mf, "terms") <- NULL y <- mf[,response] if (!NOPRED & !is.Surv(y)) data <- mf else data <- data[complete.cases(data),] if(all(estimator %in% c("boot", "632plus")) & all(c("boot", "632plus") %in% estimator)) { estimator <- paste(sort(estimator), collapse = "_") } else { if(length(estimator) > 1) { estimator <- estimator[1] # warning(paste("Multiple choice of estimators, only", estimator, "is performed")) } else { estimator <- match.arg(estimator) } } if(is.null(model)) stop("no model specified") switch(estimator, "cv" = { RET <- cv(y, formula, data, model=model, predict=predict, k=est.para$k, random=est.para$random, predictions=est.para$predictions, strat=est.para$strat, getmodels=est.para$getmodels, list.tindx = est.para$list.tindx, ...) }, "boot" = { RET <- bootest(y, formula, data, model=model, predict=predict, nboot=est.para$nboot, list.tindx = est.para$list.tindx, predictions = est.para$predictions, ...) }, "632plus" = { RET <- bootest(y, formula, data, model=model, predict=predict, nboot=est.para$nboot, bc632plus=TRUE, list.tindx = est.para$list.tindx, predictions = est.para$predictions, ...) }, "632plus_boot" = { RET <- bootest(y, formula, data, model=model, predict=predict, nboot=est.para$nboot, bc632plus = TRUE, list.tindx = est.para$list.tindx, predictions = est.para$predictions, both.boot = TRUE, ...) } ) } RET$call <- cl return(RET) } errorestinclass <- function(formula, data, subset=NULL, na.action=NULL, model=NULL, predict=NULL, estimator = c("cv", "boot", "632plus"), est.para = control.errorest(), ...) { if (is.null(data)) stop("data argument required but not given") # if (is.null(iclass)) # stop("no class membership variable for indirect classification given") iclass <- paste(formula[[2]][[2]]) if (!(iclass %in% colnames(data))) stop("membership variable not in given data") # # data <- data[complete.cases(data),] # iclassindx <- which(colnames(data) == iclass) y <- data[,iclassindx] if (!is.factor(y)) stop("iclass is not a factor") # X <- data[,-iclassindx] X <- data if(is.null(model)) stop("no classifier specified") switch(estimator, "cv" = { RET <- cv(y, formula, data=X, model=model, predict=predict, k=est.para$k, random=est.para$random, list.tindx = est.para$list.tindx, ...) }, "boot" = { RET <- bootest(y, formula, data=X, model=model, predict=predict, nboot=est.para$nboot, list.tindx = est.para$list.tindx, ...) }, "632plus" = { RET <- bootest(y, formula, data=X, model=model, predict=predict, nboot=est.para$nboot, bc632plus=TRUE, list.tindx = est.para$list.tindx, ...) }) RET } ipred/R/sbrier.R0000644000176200001440000001162414172231220013165 0ustar liggesusers# $Id: sbrier.R,v 1.5 2009/03/27 16:18:38 hothorn Exp $ getsurv <- function(obj, times) { # get the survival probability for times from KM curve `obj' if (!inherits(obj, "survfit")) stop("obj is not of class survfit") # class(obj) <- NULL # lt <- length(times) nsurv <- times # if the times are the same, return the km-curve if(length(times) == length(obj$time)) { if (all(times == obj$time)) return(obj$surv) } # otherwise get the km-value for every element of times separatly inside <- times %in% obj$time for (i in (1:lt)) { if (inside[i]) nsurv[i] <- obj$surv[obj$time == times[i]] else { less <- obj$time[obj$time < times[i]] if (length(less) == 0) nsurv[i] <- 1 else nsurv[i] <- obj$surv[obj$time == max(less)] } } nsurv } sbrier <- function(obj, pred, btime = range(obj[,1])) { if(!inherits(obj, "Surv")) stop("obj is not of class Surv") # check for right censoring # class(obj) <- NULL # if (attr(obj, "type") != "right") stop("only right-censoring allowed") N <- nrow(obj) # get the times and censoring of the data, order them with resp. to time time <- obj[,1] ot <- order(time) cens <- obj[ot,2] time <- time[ot] # get the times to compute the (integrated) Brier score over if (is.null(btime)) stop("btime not given") if (length(btime) < 1) stop("btime not given") if (length(btime) == 2) { if (btime[1] < min(time)) warning("btime[1] is smaller than min(time)") if (btime[2] > max(time)) warning("btime[2] is larger than max(time)") btime <- time[time >= btime[1] & time <= btime[2]] } ptype <- class(pred) # S3 workaround if (is.null(ptype)) { if (is.vector(pred)) ptype <- "vector" if (is.list(pred)) ptype <- "list" } # if (ptype == "numeric" && is.vector(pred)) ptype <- "vector" survs <- NULL switch(ptype, survfit = { survs <- getsurv(pred, btime) survs <- matrix(rep(survs, N), nrow=length(btime)) }, list = { if (!inherits(pred[[1]], "survfit")) stop("pred is not a list of survfit objects") if (length(pred) != N) stop("pred must be of length(time)") pred <- pred[ot] survs <- matrix(unlist(lapply(pred, getsurv, times = btime)), nrow=length(btime), ncol=N) }, vector = { if (length(pred) != N) stop("pred must be of length(time)") if (length(btime) != 1) stop("cannot compute integrated Brier score with pred") survs <- pred[ot] }, matrix = { # if (all(dim(pred) == c(length(btime), N))) survs <- pred[,ot] else stop("wrong dimensions of pred") # }) if (is.null(survs)) stop("unknown type of pred") # reverse Kaplan-Meier: estimate censoring distribution ### deal with ties hatcdist <- prodlim(Surv(time, cens) ~ 1,reverse = TRUE) csurv <- predict(hatcdist, times = time, type = "surv") csurv[csurv == 0] <- Inf # hatcdist <- survfit(Surv(time, 1 - cens) ~ 1) # csurv <- getsurv(hatcdist, time) # csurv[csurv == 0] <- Inf # conditional survival for new timepoints csurv_btime <- predict(hatcdist, times = btime, type = "surv") csurv_btime[is.na(csurv_btime)] <- min(csurv_btime, na.rm = TRUE) csurv_btime[csurv_btime == 0] <- Inf bsc <- rep(0, length(btime)) # compute Lebesque-integrated Brier score if (length(btime) > 1) { for (j in 1:length(btime)) { help1 <- as.integer(time <= btime[j] & cens == 1) help2 <- as.integer(time > btime[j]) bsc[j] <- mean((0 - survs[j,])^2*help1*(1/csurv) + (1-survs[j,])^2*help2*(1/csurv_btime[j])) } ### apply trapezoid rule idx <- 2:length(btime) RET <- diff(btime) %*% ((bsc[idx - 1] + bsc[idx]) / 2) RET <- RET / diff(range(btime)) ### previously was #diffs <- c(btime[1], btime[2:length(btime)] - # btime[1:(length(btime)-1)]) #RET <- sum(diffs*bsc)/max(btime) names(RET) <- "integrated Brier score" attr(RET, "time") <- range(btime) # compute Brier score at one single time `btime' } else { help1 <- as.integer(time <= btime & cens == 1) help2 <- as.integer(time > btime) cs <- predict(hatcdist, times=btime, type = "surv") ### cs <- getsurv(hatcdist, btime) if (cs == 0) cs <- Inf RET <- mean((0 - survs)^2*help1*(1/csurv) + (1-survs)^2*help2*(1/cs)) names(RET) <- "Brier score" attr(RET, "time") <- btime } RET } ipred/R/bootest.R0000644000176200001440000002136214172231220013356 0ustar liggesusers# $Id: bootest.R,v 1.18 2004/02/09 08:08:21 peters Exp $ bootest <- function(y, ...) { if(is.null(class(y))) class(y) <- data.class(y) UseMethod("bootest", y) } bootest.default <- function(y, ...) { stop(paste("Do not know how to handle objects of class", class(y))) } bootest.integer <- function(y, ...) { bootest.numeric(y, ...) } bootest.factor <- function(y, formula, data, model, predict, nboot=25, bc632plus = FALSE, list.tindx = NULL, predictions = FALSE, both.boot = FALSE, ...) { # bootstrap estimator of misclassification error N <- length(y) nindx <- 1:N if(!is.null(list.tindx)) nboot <- length(list.tindx) bootindx <- matrix(NA, ncol=nboot, nrow=N) if(predictions) { BOOTINDX <- data.frame(matrix(NA, ncol=nboot, nrow=N)) } classes <- levels(y) USEPM <- FALSE if(!is.data.frame(data)) stop("data is not a data.frame") if(nboot <=2) stop("to small number of bootstrap replications") if(is.null(nboot)) stop("number of bootstrap replications is missing") if(!is.null(list.tindx) & length(list.tindx) != nboot) stop(paste("List of selected observations per bootstrap sample has to be", nboot)) for(i in 1:nboot) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] if(length(tindx) > N) warning("number of selected observations is larger than the sample size") } else { tindx <- sample(nindx, N, replace = TRUE) } mymodel <- model(formula, data = data[tindx,], ...) # check if mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } if (USEPM) pred <- predict(newdata=data) else pred <- predict(mymodel, newdata = data) if (!is.factor(pred)) stop("predict does not return factor values") pred <- factor(pred, levels=classes)[-tindx] if (length(pred) != length(y[-tindx])) stop("different length of data and prediction") if(predictions) { BOOTINDX[,i] <- factor(BOOTINDX[,i],levels = classes) BOOTINDX[-tindx, i] <- pred } bootindx[-tindx, i] <- (pred != y[-tindx]) } fun <- function(x) ifelse(all(is.na(x)), NA, mean(as.integer(x), na.rm = TRUE)) one <- mean(apply(bootindx, 1, fun), na.rm = TRUE) if (bc632plus) { full.model <- model(formula, data = data, ...) # check if full.model is a function which should be used instead of # predict if (is.function(full.model)) { predict <- full.model USEPM <- TRUE } if (USEPM) full.pred <- predict(newdata=data) else full.pred <- predict(full.model, newdata = data) resubst <- mean(full.pred != y, na.rm = TRUE) err632 <- 0.368*resubst + 0.632*one y <- y[!is.na(y) & !is.na(full.pred)] full.pred <- full.pred[!is.na(y) & !is.na(full.pred)] gamma <- sum(outer(y, full.pred, function(x, y) ifelse(x==y, 0, 1) ))/ (length(y)^2) r <- (one - resubst)/(gamma - resubst) r <- ifelse(one > resubst & gamma > resubst, r, 0) errprime <- min(one, gamma) # weight <- .632/(1-.368*r) # err <- (1-weight)*resubst + weight*one err <- err632 + (errprime - resubst)*(0.368*0.632*r)/(1-0.368*r) if(predictions) RET <- list(error = err, nboot = nboot, bc632plus = TRUE, predictions = BOOTINDX) else RET <- list(error = err, nboot=nboot, bc632plus = TRUE) if(both.boot){ bc632plus <- FALSE RETbc <- RET } } if(!bc632plus) { err <- one expb <- rep(0, nboot) for(i in 1:nboot) expb[i] <- mean(apply(bootindx[,-i], 1, fun), na.rm = TRUE) sdint <- sqrt( ((nboot - 1)/nboot)*sum((expb - mean(expb))^2) ) if(predictions) RET <- list(error = err, sd = sdint, bc632plus = FALSE, nboot = nboot, predictions = BOOTINDX) else RET <- list(error = err, sd=sdint, bc632plus=FALSE, nboot=nboot) if(both.boot){ RET <- list("boot" = RET, "632plus" = RETbc) } } class(RET) <- "bootestclass" RET } bootest.numeric <- function(y, formula, data, model, predict, nboot=25, bc632plus=FALSE, list.tindx = NULL, predictions = FALSE, ...) { # bootstrap estimator of root of mean squared error if (bc632plus) stop("cannot compute 632+ estimator of mean squared error") if(!is.null(list.tindx)) nboot <- length(list.tindx) if (nboot <=2) stop("to small number of bootstrap replications") ##FIX: nrow = N <- length(y) nindx <- 1:N bootindx <- matrix(NA, ncol=nboot, nrow=N) if(predictions) BOOTINDX <- matrix(NA, ncol=nboot, nrow=N) USEPM <- FALSE if (!is.data.frame(data)) stop("data is not a data.frame") if(is.null(nboot)) stop("number of bootstrap replications is missing") if(!is.null(list.tindx) & length(list.tindx) != nboot) stop(paste("List of selected observations per bootstrap sample has to be", nboot)) for(i in 1:nboot) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] if(length(tindx) > N) warning("number of selected observations is larger than the sample size") } else { tindx <- sample(nindx, N, replace = TRUE) } # tindx <- ifelse(!is.null(list.tindx), list.tindx[[i]], sample(nindx, N, replace = TRUE)) mymodel <- model(formula, data = data[tindx,], ...) outbootdata <- subset(data, !(nindx %in% tindx)) # check if mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } if (USEPM) pred <- predict(newdata=outbootdata) else pred <- predict(mymodel, newdata = outbootdata) if (!is.numeric(pred)) stop("predict does not return numerical values") if (length(pred) != length(y[-tindx])) stop("different length of data and prediction") if(predictions) BOOTINDX[-tindx, i] <- pred bootindx[-tindx, i] <- (pred - y[-tindx])^2 } fun <- function(x) ifelse(all(is.na(x)), NA, mean(x, na.rm = TRUE)) err <- sqrt(mean(apply(bootindx, 1, fun), na.rm = TRUE)) if(predictions) RET <- list(error = err, nboot = nboot, predictions = BOOTINDX) else RET <- list(error = err, nboot=nboot) class(RET) <- "bootestreg" RET } bootest.Surv <- function(y, formula, data=NULL, model, predict, nboot=25, bc632plus=FALSE, list.tindx = NULL, predictions = FALSE, ...) { # bootstrap estimator of Brier's score if (bc632plus) stop("cannot compute 632+ estimator of Brier's score") N <- dim(y)[1] if(!is.null(list.tindx)) nboot <- length(list.tindx) nindx <- 1:N bootindx <- matrix(NA, ncol=nboot, nrow=N) if(predictions) BOOTINDX <- matrix(NA, ncol=nboot, nrow=N) USEPM <- FALSE if(is.null(nboot)) stop("number of bootstrap replications is missing") if (nboot <=2) stop("to small number of bootstrap replications") if (is.null(data)) data <- as.data.frame(rep(1, N)) if (!is.data.frame(data)) stop("data is not a data.frame") if(!is.null(list.tindx)) nboot <- length(list.tindx) for(i in 1:nboot) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] if(tindx > N) warning("number of selected observations is larger than the sample size") } else { tindx <- sample(nindx, N, replace = TRUE) } #tindx <- ifelse(!is.null(list.tindx), list.tindx[[i]], sample(nindx, N, replace = TRUE)) #tindx <- sample(nindx, N, replace = TRUE) mymodel <- model(formula, data=data[tindx,], ...) outbootdata <- subset(data, !(nindx %in% tindx)) # check if mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } if (USEPM) pred <- predict(newdata=outbootdata) else pred <- predict(mymodel, newdata = outbootdata) if (is.list(pred)) { if (!inherits(pred[[1]], "survfit") && !inherits(pred, "survfit")) stop("predict does not return a list of survfit objects") } else { stop("predict does not return a list of survfit objects") } if(predictions) BOOTINDX[-tindx, i] <- sbrier(y[-tindx], pred) ###??? bootindx[-tindx, i] <- sbrier(y[-tindx], pred) } fun <- function(x) ifelse(all(is.na(x)), NA, mean(x, na.rm = TRUE)) err <- mean(apply(bootindx, 1, fun), na.rm = TRUE) if(predictions) RET <- list(error = err, nboot = nboot, predictions = BOOTINDX) else RET <- list(error = err, nboot=nboot) class(RET) <- "bootestsurv" RET } ipred/R/ipredbagg.R0000644000176200001440000002043714172231220013625 0ustar liggesusers#$Id: ipredbagg.R,v 1.13 2003/06/11 10:40:17 peters Exp $ workhorse <- function(y, X, control, comb, bcontrol, thisclass, ...) { # This is double-bagging (comb is lda) or bundling (any arbritrary # model in comb) if (!is.data.frame(X)) X <- as.data.frame(X) # check user supplied functions if (!is.list(comb)) stop("comb not a list") N <- nrow(X) mydata <- cbind(data.frame(y), X) mtrees <- vector(mode="list", length=bcontrol$nbagg) for (i in 1:bcontrol$nbagg) { # double-bagging or bundling # comb is a list of lists, each of them having two elements: # model and predict bindx <- sample(1:N, bcontrol$ns, replace=bcontrol$replace) objs <- vector(mode="list", length=length(comb)) addclass <- function() { myindx <- 1:length(comb) for (k in 1:length(comb)) { # put the user supplied models into a try statement # if this fails, simply ignore it. # options(show.error.messages = FALSE) oX <- mydata[-bindx,] foo <- try(comb[[k]]$model(y ~ ., data=oX)) if (inherits(foo, "try-error")) { warning("could not build model:") print(foo[1]) foo <- NA myindx <- myindx[-k] } objs[[k]] <- foo # options(show.error.messages = TRUE) } fct <- function(newdata) { # use lexical scoping: return this function for the computation of # the additional predictors if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) addpred <- c() # the user supplied model failed, ignore it here. if (length(myindx) < 1) { RET <- NULL } else { # compute additional predictors for user supplied models for (k in myindx) addpred <- cbind(addpred, comb[[k]]$predict(objs[[k]], newdata)) # : more informative names??? colnames(addpred) <- paste("addpred", 1:ncol(addpred), sep="") # RET <- addpred } RET } if (length(myindx) < 1) return(NULL) else return(fct) } bfct <- addclass() # may have failed if (!is.null(bfct)) { # grow a tree using the original predictors # from the bootstrap sample and the additional predictors computed on # the bootstrap sample. oX <- cbind(mydata, bfct(X))[bindx,] btree <- rpart(y ~., data=oX, control = control,...) # return this object this <- list(bindx = bindx, btree = btree, bfct=bfct) } else { # return a simple tree if the user supplied model failed. oX <- mydata[bindx,] btree <- rpart(y ~., data=oX, control = control,...) this <- list(bindx = bindx, btree = btree) } class(this) <- thisclass mtrees[[i]] <- this } mtrees } ipredbagg <- function(y, ...) { if(is.null(class(y))) class(y) <- data.class(y) # UseMethod("ipredbagg", y, ...) UseMethod("ipredbagg", y) } ipredbagg.default <- function(y, ...) { stop(paste("Do not know how to handle objects of class", class(y))) } ipredbagg.integer <- function(y, ...) { ipredbagg.numeric(y,...) } ipredbagg.factor <- function(y, X=NULL, nbagg=25, control= rpart.control(minsplit=2, cp=0, xval=0), comb=NULL, coob=FALSE, ns=length(y), keepX = TRUE, ...) { # bagging classification trees if (!is.null(comb) && coob) stop("cannot compute out-of-bag estimate for combined models") if (nbagg == 1 && coob) stop("cannot compute out-of-bag estimate for single tree") # check nbagg if (nbagg < 1) stop("nbagg is not a positive integer") # bagging only if nbagg greater 1, else use the whole sample, i.e. one # simple tree if (nbagg == 1) { REPLACE <- FALSE } else { if (ns < length(y)) { # this is "subagging", i.e. sampling ns out of length(y) WITHOUT # replacement REPLACE <- FALSE } else { # the usual bootstrap: n out of n with replacement REPLACE <- TRUE } } if (!is.null(comb)) { # this is rather slow but we need to be as general as possible # with respect to classifiers as well as outcome of prediction (classes, # linear discriminant functions, conditional class probabilities, random # noise, if you like) mtrees <- workhorse(y, X, control, comb, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE), thisclass="sclass") } else { # use an optimized version mydata <- cbind(data.frame(y), X) mtrees <- irpart(y ~ ., data=mydata, control=control, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE)) } # always keep response and predictors as well as a list of nbagg objects # of class "sclass" if (keepX) RET <- list(y=y, X=X, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) else RET <- list(y=y, X=NULL, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) class(RET) <- "classbagg" if (coob) { pred <- predict(RET) ae <- all.equal(levels(pred), levels(RET$y)) if (is.logical(ae) && ae) RET$err <- mean(pred != RET$y, na.rm=TRUE) else RET$err <- mean(as.character(pred) != as.character(RET$y), na.rm=TRUE) } RET } ipredbagg.numeric <- function(y, X=NULL, nbagg=25, control= rpart.control(xval=0), comb=NULL, coob=FALSE, ns=length(y), keepX = TRUE, ...) { # is control meaningful here??? # bagging regression trees if (!is.null(comb) && coob) stop("cannot compute out-of-bag estimate for combined models") if (nbagg == 1 && coob) stop("cannot compute out-of-bag estimate for single tree") # check nbagg if (nbagg < 1) stop("nbagg is not a positive integer") # only bagg if nbagg greater 1, else use the whole sample if (nbagg == 1) { REPLACE <- FALSE } else { if (ns < length(y)) { # this is "subagging", i.e. sampling ns out of length(y) WITHOUT # replacement REPLACE <- FALSE } else { # the usual bootstrap: n out of n with replacement REPLACE <- TRUE } } if (!is.null(comb)) { mtrees <- workhorse(y, X, control, comb, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE), thisclass="sreg") } else { mydata <- cbind(data.frame(y), X) mtrees <- irpart(y ~ ., data=mydata, control=control, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE)) } if (keepX) RET <- list(y=y, X=X, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) else RET <- list(y=y, X=NULL, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) class(RET) <- "regbagg" if (coob) RET$err <- sqrt(mean((predict(RET) - RET$y)^2, na.rm=TRUE)) RET } ipredbagg.Surv <- function(y, X=NULL, nbagg=25, control= rpart.control(xval=0), comb=NULL, coob=FALSE, ns=dim(y)[1], keepX = TRUE, ...) { # is control meaningful here??? # bagging survival trees if (!is.null(comb) && coob) stop("cannot compute out-of-bag estimate for combined models") if (nbagg == 1 && coob) stop("cannot compute out-of-bag estimate for single tree") # check nbagg if (nbagg < 1) stop("nbagg is not a positive integer") # only bagg if nbagg greater 1, else use the whole sample if (nbagg == 1) { REPLACE <- FALSE } else { if (ns < dim(y)[1]) { # this is "subagging", i.e. sampling ns out of length(y) WITHOUT # replacement REPLACE <- FALSE } else { # the usual bootstrap: n out of n with replacement REPLACE <- TRUE } } if (!is.null(comb)) { mtrees <- workhorse(y, X, control, comb, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE), thisclass="ssurv") } else { mydata <- cbind(data.frame(y), X) mtrees <- irpart(y ~ ., data=mydata, control=control, bcontrol=list(nbagg=nbagg, ns=ns, replace=REPLACE)) } if (keepX) RET <- list(y=y, X=X, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) else RET <- list(y=y, X=NULL, mtrees=mtrees, OOB=coob, comb=!is.null(comb)) class(RET) <- "survbagg" if (coob) RET$err <- sbrier(RET$y, predict(RET)) RET } ipred/R/predict.irpart.R0000644000176200001440000000347514172231220014636 0ustar liggesusers# # a modified version of `predict.rpart.s' from the rpart package # see COPYRIGHTS for details. # predict.irpart <- function(object, newdata = list(), type = c("vector", "prob", "class", "matrix"), ...) { if(!inherits(object, "rpart")) stop("Not legitimate tree") mtype <- missing(type) type <- match.arg(type) if(missing(newdata)) where <- object$where else { if(is.null(attr(newdata, "terms")) & !inherits(newdata, "rpart.matrix")) { Terms <- delete.response(object$terms) act <- (object$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(object, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } where <- getFromNamespace("pred.rpart", ns = "rpart")(object, newdata) } frame <- object$frame method <- object$method ylevels <- attr(object, "ylevels") nclass <- length(ylevels) if(mtype && nclass > 0) type <- "prob" if(type == "vector" || (type=="matrix" && is.null(frame$yval2))) { pred <- frame$yval[where] names(pred) <- names(where) } else if (type == "matrix") { pred <- frame$yval2[where,] dimnames(pred) <- list(names(where), NULL) } else if(type == "class" && nclass > 0) { pred <- factor(ylevels[frame$yval[where]], levels=ylevels) names(pred) <- names(where) } else if (type == "prob" && nclass > 0) { pred <- frame$yval2[where, 1 + nclass + 1:nclass] dimnames(pred) <- list(names(where), ylevels) } else stop("Invalid prediction for rpart object") # Expand out the missing values in the result # But only if operating on the original dataset if (missing(newdata) && !is.null(object$na.action)) pred <- naresid(object$na.action, pred) pred } ipred/R/kfoldcv.R0000644000176200001440000000120314172231220013317 0ustar liggesusers# $Id: kfoldcv.R,v 1.3 2002/09/12 08:56:42 hothorn Exp $ kfoldcv <- function(k,N, nlevel=NULL) { if (is.null(nlevel)) { # no stratification if (k > N) return(c(rep(1, N), rep(0, k-N))) fl <- floor(N/k) ce <- ceiling(N/k) if (fl == ce) return(rep(fl, k)) else return(c(rep(ce, round((N/k - fl)*k)), rep(fl, round((1 - (N/k - fl))*k)))) } else { # stratification # if (!is.integer(nlevel)) stop("nlevel is not a vector if integers") kmat <- matrix(0, ncol=k, nrow=length(nlevel)) for (i in 1:length(nlevel)) kmat[i,] <- kfoldcv(k, nlevel[i]) return(kmat) } } ipred/R/predict.inclass.R0000644000176200001440000000241114172231220014756 0ustar liggesusers# $Id: predict.inclass.R,v 1.19 2003/03/31 08:44:16 peters Exp $ # Additional option type ="class", if intermediate is nominal predict.inclass <- function(object, newdata, ...) { if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata) q <- length(object$model.intermediate) # number of intermediates namen <- names(object$model.intermediate) intermediate <- is.vector(NULL, mode = "NULL") for(i in 1:q) { if(!is.null(object$para.intermediate[[i]][["predict"]])) { RET <- object$para.intermediate[[i]][["predict"]](object$model.intermediate[[i]], newdata = newdata, ...) } else { RET <- predict(object$model.intermediate[[i]], newdata = newdata, ...) } intermediate <- data.frame(intermediate, RET) } intermediate <- intermediate[,-1] names(intermediate) <- namen intermediate <- data.frame(newdata[,!(names(newdata) %in% names(intermediate))], intermediate) if(!is.function(object$para.response)) { if(!is.null(object$para.response[["predict"]])) { RET <- object$para.response[["predict"]](object$model.response, newdata = intermediate, ...) } else { RET <- predict(object$model.response, newdata = intermediate, ...) } } else { RET <- object$para.response(intermediate) } return(RET) } ipred/R/varset.R0000644000176200001440000000143714172231220013204 0ustar liggesusers# $Id: varset.R,v 1.2 2002/03/26 16:29:15 hothorn Exp $ varset <- function(N, sigma = 0.1, theta = 90, threshold = 0, u = 1:3) { # create U U <- matrix(rep(0, 4), ncol = 2) U[1, 1] <- u[1] U[1, 2] <- u[2] U[2, 1] <- u[3] U[2, 2] <- (theta-u[1]*u[3])/u[2] lambda <- sqrt(U[1, 1]^2 + U[1, 2]^2) U[1, ] <- U[1, ]/lambda lambda <- sqrt(U[2, 1]^2 + U[2, 2]^2) U[2, ] <- U[2, ]/lambda e <- matrix(rnorm(2*N, sd = sigma), ncol = 2, byrow = TRUE) expl <- matrix(rnorm(2*N), ncol = 2, byrow = TRUE) inter <- t(U %*%t(expl) + t(e)) z <- (inter > threshold) resp <- as.factor(ifelse((z[,1] + z[,2]) > 1, 1, 0)) colnames(expl) <- c("x1", "x2") colnames(inter) <- c("y1", "y2") result <- list(explanatory = expl, intermediate = inter, response = resp) return(result) } ipred/R/cv.R0000644000176200001440000001642414172231220012312 0ustar liggesusers#$Id: cv.R,v 1.21 2004/02/11 09:13:51 peters Exp $ cv <- function(y, ...) { if(is.null(class(y))) class(y) <- data.class(y) UseMethod("cv", y) } cv.default <- function(y, ...) { stop(paste("Do not know how to handle objects of class", class(y))) } cv.integer <- function(y, ...) { cv.numeric(y, ...) } cv.factor <- function(y, formula, data, model, predict, k=10, random=TRUE, strat=FALSE, predictions=NULL, getmodels=NULL, list.tindx = NULL, ...) { # k-fold cross-validation of misclassification error if (!is.data.frame(data)) stop("data is not of class data.frame") N <- length(y) classes <- levels(y) if (is.null(k)) k <- 10 if (is.null(random)) random <- TRUE if (is.null(strat)) strat <- FALSE if (is.null(predictions)) predictions <- FALSE if (is.null(getmodels)) getmodels <- FALSE USEPM <- FALSE if(!is.null(list.tindx)) k <- length(list.tindx) if(!is.null(list.tindx)) { random <- FALSE } # to reproduce results, either use `set.seed' or a fixed partition of # the samples if (random) myindx <- sample(1:N, N) else myindx <- 1:N y <- y[myindx] data <- data[myindx,] # determine an appropriate splitting for the sample size into # k roughly equally sized parts mysplit <- ssubset(y, k, strat=strat) allpred <- vector(mode="character", length=N) fu <- function(x) levels(x)[as.integer(x)] nindx <- 1:N if (getmodels) models <- vector(k, mode="list") for(i in 1:k) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] } else { tindx <- mysplit[[i]] } folddata <- subset(data, !(nindx %in% tindx)) mymodel <- model(formula, data=folddata, ...) if (getmodels) models[[i]] <- mymodel # check of mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } # we assume predict to return factor levels if (USEPM) pred <- predict(newdata=data) else pred <- predict(mymodel, newdata = data) if (!is.factor(pred)) stop("predict does not return factor values") pred <- factor(pred, levels=classes) # # there is no c() for factors which preserves the levels, isn't it? # use characters allpred[tindx] <- fu(pred[tindx]) # } allpred <- factor(allpred, levels=classes) allpred <- allpred[order(myindx)] err <- mean(allpred != y[order(myindx)], na.rm = TRUE) if (predictions) RET <- list(error = err, k = k, predictions=allpred) else RET <- list(error = err, k = k) if (getmodels) RET <- c(RET, models=list(models)) class(RET) <- "cvclass" RET } cv.numeric <- function(y, formula, data, model, predict, k=10, random=TRUE, predictions=NULL, strat=NULL, getmodels=NULL, list.tindx = NULL, ...) { # k-fold cross-validation of mean squared error if (!is.data.frame(data)) stop("data is not of class data.frame") if(!is.null(list.tindx)) k <- length(list.tindx) N <- length(y) if (is.null(k)) k <- 10 if (is.null(random)) random <- TRUE if (is.null(predictions)) predictions <- FALSE if (is.null(getmodels)) getmodels <- FALSE USEPM <- FALSE # determine an appropriate splitting for the sample size into # k roughly equally sized parts # if(is.null(list.tindx)) { a <- kfoldcv(k, N) # to reproduce results, either use `set.seed' or a fixed partition of # the samples if (random) myindx <- sample(1:N, N) else myindx <- 1:N nindx <- 1:N # } if (getmodels) models <- vector(k, mode="list") allpred <- rep(0, N) for(i in 1:k) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] } else { if (i > 1) tindx <- myindx[(sum(a[1:(i-1)])+1):sum(a[1:i])] else tindx <- myindx[1:a[1]] } folddata <- subset(data, !(nindx %in% tindx)) mymodel <- model(formula, data=folddata, ...) if (getmodels) models[[i]] <- mymodel # check of mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } outfolddata <- subset(data, nindx %in% tindx) if (USEPM) pred <- predict(newdata=outfolddata) else pred <- predict(mymodel, newdata = outfolddata) if (!is.numeric(pred)) stop("predict does not return numerical values") allpred[sort(tindx)] <- pred } err <- sqrt(mean((allpred - y)^2, na.rm = TRUE)) if (predictions) RET <- list(error = err, k = k, predictions=allpred) else RET <- list(error = err, k = k) if (getmodels) RET <- c(RET, models=list(models)) class(RET) <- "cvreg" RET } cv.Surv <- function(y, formula, data=NULL, model, predict, k=10, random=TRUE, predictions=FALSE, strat=FALSE, getmodels=NULL, list.tindx = NULL, ...) { # k-fold cross-validation of Brier's score if (is.null(predictions)) predictions <- FALSE if(is.null(random)) random <- TRUE if (is.null(predictions)) predictions <- FALSE if (is.null(strat)) strat <- FALSE if (is.null(getmodels)) getmodels <- FALSE USEPM <- FALSE N <- length(y[,1]) nindx <- 1:N if(is.null(random)) random <- TRUE if(is.null(k)) k <- 10 if (is.null(data)) data <- rep(1, N) if(!is.null(list.tindx)) k <- length(list.tindx) if(is.null(k)) stop("k for k-fold cross-validation is missing") # determine an appropriate splitting for the sample size into # k roughly equally sized parts # if(is.null(list.tindx)) { a <- kfoldcv(k, N) # to reproduce results, either use `set.seed' or a fixed partition of # the samples if (random) myindx <- sample(1:N, N) else myindx <- 1:N # } if (getmodels) models <- vector(k, mode="list") cverr <- c() for(i in 1:k) { if(!is.null(list.tindx)) { tindx <- list.tindx[[i]] } else { if (i > 1) tindx <- myindx[(sum(a[1:(i-1)])+1):sum(a[1:i])] else tindx <- myindx[1:a[1]] } folddata <- subset(data, !(nindx %in% tindx)) mymodel <- model(formula, data=folddata, ...) if (getmodels) models[[i]] <- mymodel # check if mymodel is a function which should be used instead of # predict if (is.function(mymodel)) { if(!is.null(predict) & i == 1) warning("model returns a function and predict is specified, using models output") predict <- mymodel USEPM <- TRUE } outfolddata <- subset(data, (nindx %in% tindx)) if (USEPM) pred <- predict(newdata=outfolddata) else pred <- predict(mymodel, newdata = outfolddata) if (is.list(pred)) { if (!inherits(pred[[1]], "survfit") && !inherits(pred, "survfit")) stop("predict does not return a list of survfit objects") } else { stop("predict does not return a list of survfit objects") } err <- sbrier(y[sort(tindx)], pred) cverr <- c(cverr,rep(err, length(tindx))) } RET <- list(error = mean(cverr), k=k) if (getmodels) RET <- c(RET, models=list(models)) class(RET) <- "cvsurv" RET } ipred/R/inbagg.R0000644000176200001440000002153614172231220013131 0ustar liggesusersworkhorse.inbagg <- function(object, y, X, W, cFUN, w.training.set, y.training.set, bcontrol, control, ...) { formula.list <- object data <- data.frame(y, X, W) mtrees <- vector(mode="list", length=bcontrol$nbagg) if(w.training.set[1] == "all") fit.vals <- 1:length(y) for (i in 1:bcontrol$nbagg) { bindx <- sample(1:length(y), bcontrol$ns, replace=bcontrol$replace) if(w.training.set[1] == "oob") fit.vals <- (-bindx) if(w.training.set[1] == "bag") fit.vals <- bindx objs <- vector(mode="list", length=length(formula.list)) #prediction models for intermediate variables names(objs) <- names(formula.list) addclass <- function() { ##START addclass <- function() for (j in 1:length(formula.list)) { ##Fitting prediction models for intermediates oX <- data[fit.vals, c(paste(formula.list[[j]]$formula[[2]]), attr(terms(formula.list[[j]]$formula, dataa = data), "term.labels"))] foo <- try(formula.list[[j]]$model(formula.list[[j]]$formula, data = oX)) objs[[j]] <- foo } fct <- function(newdata) { ##START fct <- function(newdata) if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) add.predictors <- rep(0, nrow(newdata)) for (j in 1:length(formula.list)){ ## predict additional intermediates using fitted models oXnewdata <- newdata[,attr(terms(formula.list[[j]]$formula, data = data), "term.labels")] if(is.null(formula.list[[j]]$predict)) { res <- try(predict(objs[[j]], newdata = oXnewdata)) } else { res <- try(formula.list[[j]]$predict(objs[[j]], newdata = oXnewdata)) } ###FIX: action for class(res) == "try-error" add.predictors <- data.frame(add.predictors, res) } add.predictors <- add.predictors[,-1] if(is.null(dim(add.predictors))) add.predictors <- matrix(add.predictors, ncol = 1) colnames(add.predictors) <- names(formula.list) add.predictors } ##END fct <- function(newdata) return(fct) } ##END addclass <- function() bfct <- addclass() ###bfct is a function (addclass) if (!is.null(bfct)) { expl.cFUN <- attr(terms(as.formula(cFUN$formula), data = data), "term.labels") if(!is.null(cFUN$fixed.function)) { btree <- cFUN } else { W.new <- bfct(X) W.new.names <- sub(".[0-9]$", "", colnames(W.new)) if(y.training.set[1] == "fitted.bag") { ###contstruct on bag oX <- data.frame(y, X, W.new)[bindx,] right.side <- paste(c(expl.cFUN[!(expl.cFUN %in% W.new.names)], colnames(W.new)[W.new.names %in% expl.cFUN]), collapse = "+") cFUN$formula <- as.formula(paste(cFUN$formula[[2]], "~", right.side)) } if(y.training.set[1] == "original") { ###construct on original variables if(length(W.new.names)> length(colnames(W))) stop("If classifying function is trained on original intermediate, only one predictive model per intermediate can be constructed.") oX <- data.frame(y, X, W[,W.new.names]) names(oX)[(ncol(oX)-ncol(W)+1):ncol(oX)] <- colnames(W.new) } if(y.training.set[1] == "fitted.subset") { ###construct on subset oX <- data.frame(y, X, W.new)[!subset,] right.side <- paste(c(expl.cFUN[!(expl.cFUN %in% W.new.names)], colnames(W.new)[W.new.names %in% expl.cFUN]), collapse = "+") cFUN$formula <- as.formula(paste(cFUN$formula[[2]], "~", right.side)) } names(oX)[names(oX) == "y"] <- paste(cFUN$formula[[2]]) btree <- cFUN$model(cFUN$formula, data = oX, ...) btree <- list(model = btree, predict = cFUN$predict) } this <- list(bindx = bindx, btree = btree, bfct=bfct) } else { stop("Predictive function for intermediates not executable: Classifying function can not be applied.") } class(this) <- "thisclass" mtrees[[i]] <- this } mtrees } inbagg <- function(formula, data, ...) UseMethod("inbagg", data) inbagg.default <- function(formula, data,...) { stop(paste("Do not know how to handle objects of class", class(data))) } inbagg.data.frame <- function(formula, data, pFUN=NULL, cFUN=list(model = NULL, predict = NULL, training.set = NULL), nbagg = 25, ns = 0.5, replace = FALSE, ...) { if(!is.function(cFUN)) { if(is.null(cFUN$model)) { cFUN$model <- function(formula, data) rpart(formula, data, control = rpart.control(minsplit=2, cp=0, xval=0)) if(is.null(cFUN$predict)) cFUN$predict <- function(object, newdata) predict(object, newdata, type = "class") if(is.null(cFUN$training.set)) cFUN$trainig.set <- "fitted.bag" } } ##check formula if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) ##editing formula if(length(formula[[2]])==3) { if(!is.function(cFUN)) { if (is.null(cFUN$formula)) y.formula <- as.formula(formula[[2]]) else y.formula <- cFUN$formula } w.formula <- XX~YY w.formula[[2]] <- formula[[2]][[3]] w.formula[[3]] <- formula[[3]] response <- paste(formula[[2]][[2]]) w.names <- attr(terms(as.formula(formula[[2]]), data = data), "term.labels") x.names <- attr(terms(as.formula(formula), data = data), "term.labels") if((length(x.names) == 1) && x.names == ".") x.names <- colnames(data)[!(colnames(data) %in% c(response, w.names))] y <- data[, response] X <- data[, x.names] W <- data[, w.names] if(is.null(dim(X))) X <- matrix(X, ncol = 1, dimnames = list(rownames(W), x.names)) if(is.null(dim(W))) W <- matrix(W, ncol = 1, dimnames = list(rownames(X), w.names)) if(is.function(cFUN)) { y.formula <- as.formula(paste(formula[[2]][[2]], "~", paste(c(x.names, w.names), collapse = "+"))) fixed.function <- cFUN cFUN <- list() cFUN$fixed.function <- fixed.function } cFUN$formula <- y.formula } else { stop(paste("Specified formula has to be of type y~x~w")) } ##remove settings of training.set if(is.null(pFUN$training.set)) w.training.set <- "oob" else w.training.set <- pFUN$training.set[1] pFUN$training.set <- NULL if(is.null(cFUN$training.set)) y.training.set <- "fitted.bag" else y.training.set <- cFUN$training.set[1] cFUN$training.set <- NULL bcontrol <- list(nbagg = nbagg, ns = length(y)*ns, replace = replace) if(is.null(w.formula)) stop("no formula for prediction model specified") ##formula.list : list of lists which specify an abitrary number of models for intermediate variables: ##w1.1, w2.1, w3.1, ...., w2.1, w2.2, w3.1, .... where 'w*' is the variable and '.*' describes the model P <- length(pFUN) number.models <- c() for(i in 1:P) { if(is.null(pFUN[[i]]$formula)) pFUN[[i]]$formula <- w.formula number.models <- c(number.models, paste(attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels"), ".", i, sep = "")) } formula.list <- vector(mode = "list", length= length(number.models)) names(formula.list) <- paste(number.models) for(i in 1:P) { res <- list() Qi <- length(attr(terms(pFUN[[i]]$formula[-3], data = data), "term.labels")) for(j in 1:Qi) { res$formula <- w.formula res$formula[[2]] <- as.name(attr(terms(res$formula[-3], data = data), "term.labels")[j]) res$formula[[3]] <- pFUN[[i]]$formula[[3]] if(res$formula[[3]] == ".") res$formula <- as.formula(paste(res$formula[[2]], "~", paste(x.names, collapse= "+"))) res$model <- pFUN[[i]]$model res$predict <- pFUN[[i]]$predict formula.list[[paste(res$formula[[2]], ".", i, sep = "")]] <- res } } ##apply res <- workhorse.inbagg(object = formula.list, y = y, X = X, W = W, cFUN = cFUN, w.training.set = w.training.set, y.training.set = y.training.set, bcontrol = bcontrol, ...) RET <- list(mtrees = res, y = y, W = W, X = X) class(RET) <- "inbagg" RET } print.inbagg <- function(x, ...) { q <- length(x$mtrees) intermediates <- attr(x$W, "names") text.intermediates <- paste("Indirect bagging, with", q, "bootstrap samples and intermediate variables: \n", paste(intermediates, collapse = " ")) cat("\n", text.intermediates, "\n") } summary.inbagg <- function(object, ...) { class(object) <- "summary.inbagg" object } print.summary.inbagg <- function(x, ...) { q <- length(x$mtrees) intermediates <- attr(x$W, "names") text.intermediates <- paste("Indirect bagging, with", q, "bootstrap samples and intermediate variables:", paste(intermediates, collapse = " ")) cat("\n", text.intermediates, "\n") for(i in 1:length(x)) { print(x$mtrees[[i]]) } } ipred/R/rsurv.R0000644000176200001440000000374414172231220013064 0ustar liggesusers# $Id: rsurv.R,v 1.5 2003/03/31 08:44:16 peters Exp $ rsurv <- function(N, model=c("A", "B", "C", "D", "tree"), gamma=NULL, fact=1, pnon=10, gethaz=FALSE) { model <- match.arg(model) X <- matrix(runif(N*5), ncol=5) colnames(X) <- paste("X", 1:ncol(X), sep="") switch(model, "A" = { time <- rexp(N) haz <- rep(1, N) }, "B" = { hazard <- as.numeric(X[,1] <= 0.5 & X[,2] > 0.5) time <- rexp(N) time[hazard == 1] <- rexp(sum(hazard==1), exp(3)) haz <- rep(1, N) haz[hazard == 1] <- exp(3) }, "C" = { hazard <- 3*X[,1] + X[,2] haz <- exp(hazard) time <- sapply(haz, rexp, n=1) }, "D" = { hazard <- 3*X[,1] - 3*X[,2] + 4*X[,3] - 2*X[,4] haz <- exp(hazard) time <- sapply(haz, rexp, n=1) }, "tree" = { hazard <- rep(0, nrow(X)) hazard[(X[,1] <= 0.5 & X[,2] <= 0.5)] <- 0 hazard[(X[,1] <= 0.5 & X[,2] > 0.5 & X[,4] <= 0.5)] <- 1 hazard[(X[,1] <= 0.5 & X[,2] > 0.5 & X[,4] > 0.5)] <- 0 hazard[(X[,1] > 0.5 & X[,3] <= 0.5 & X[,5] <= 0.3)] <- 1 hazard[(X[,1] > 0.5 & X[,3] <= 0.5 & X[,5] > 0.3)] <- 2 hazard[(X[,1] > 0.5 & X[,3] > 0.5 & X[,4] <= 0.7)] <- 2 hazard[(X[,1] > 0.5 & X[,3] > 0.5 & X[,4] > 0.7)] <- 3 hazard <- hazard * fact haz <- exp(hazard) time <- sapply(haz, rexp, n=1) if (pnon > 0) X <- cbind(X, matrix(runif(N*pnon), ncol=pnon)) colnames(X) <- paste("X", 1:ncol(X), sep="") }) if (!is.null(gamma)) censtime <- runif(N, min=0, max=gamma) else censtime <- Inf cens <- as.numeric(time <= censtime) time <- pmin(time, censtime) simdat <- as.data.frame(cbind(time, cens, X)) if (gethaz) attr(simdat, "hazard") <- haz return(simdat) } ipred/R/checkfunArgs.R0000644000176200001440000000152014172231220014274 0ustar liggesusers# $Id: checkfunArgs.R,v 1.1 2003/02/17 09:49:31 hothorn Exp $ checkfunArgs <- function(fun, type=c("model", "predict")) { # check for appropriate arguments of user-supplied function "fun" # this will not work for generics in R < 1.7.0 and therefore not used by # now type <- match.arg(type) if (!is.function(fun)) { warning("fun is not a function") return(FALSE) } funargs <- formals(fun) switch(type, "model"={ if (!all(names(funargs)[1:2] %in% c("formula", "data"))) { warning("fun is not a function with at least 'formula' and 'data' arguments") return(FALSE) } else { return(TRUE) } }, "predict"={ if (length(funargs) < 2) { warnings("fun is not a function with at least 'object' and 'newdata' arguments") return(FALSE) } else { return(TRUE) } }) } ipred/R/predict.inbagg.R0000644000176200001440000000231214172231220014551 0ustar liggesuserspredict.inbagg <- function(object, newdata, ...) { if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata) if(any(names(object$W) %in% names(newdata))) newdata <- newdata[!(names(newdata) %in% names(object$W))] NBAGG <- length(object$mtrees) N <- nrow(newdata) classes <- levels(object$y) vote <- matrix(0, nrow=N, ncol=length(classes)) for(i in 1:NBAGG) { intermed <- object$mtrees[[i]]$bfct(newdata) # XX <- data.frame(newdata, intermed) if(!is.null(object$mtrees[[i]]$btree$fixed.function)) { names(intermed) <- sub(".[0-9]$", "", names(intermed)) XX <- data.frame(newdata, intermed) # names(XX)[(ncol(XX)-ncol(intermed)+1):ncol(XX)] <- sub(".[0-9]$", "", names(intermed)) res <- object$mtrees[[i]]$btree$fixed.function(XX) } else { XX <- data.frame(newdata, intermed) if(is.null(object$mtrees[[i]]$btree$predict)) { res <- try(predict(object$mtrees[[i]]$btree$model, newdata = XX, ...)) } else { res <- try(object$mtrees[[i]]$btree$predict(object$mtrees[[i]]$btree$model, newdata = XX, ...)) } } res <- cbind(1:N, res) vote[res] <- vote[res] +1 } RET <- factor(classes[apply(vote, 1, uwhich.max)]) RET } ipred/R/slda.R0000644000176200001440000000744614172231220012631 0ustar liggesusers# $Id: slda.R,v 1.9 2005/06/29 08:50:28 hothorn Exp $ # stabilized linear discriminant analysis according to Laeuter & Kropf slda <- function(y, ...) UseMethod("slda") slda.default <- function(y, ...) stop(paste("Do not know how to handle objects of class", class(data))) slda.formula <- function(formula, data, subset, na.action=na.rpart, ...) { cl <- match.call() if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m[[1]] <- as.name("model.frame") m$... <- NULL m <- eval(m, parent.frame()) Terms <- attr(m, "terms") grouping <- model.extract(m, "response") x <- model.matrix(Terms, m) xvars <- as.character(attr(Terms, "variables"))[-1] if ((yvar <- attr(Terms, "response")) > 0) xvars <- xvars[-yvar] xlev <- if (length(xvars) > 0) { xlev <- lapply(m[xvars], levels) xlev[!sapply(xlev, is.null)] } xint <- match("(Intercept)", colnames(x), nomatch=0) if(xint > 0) x <- x[, -xint, drop=FALSE] RET <- slda(y=grouping, X=x, ...) RET$terms <- Terms RET$call <- match.call() RET$contrasts <- attr(x, "contrasts") RET$xlevels <- xlev attr(RET, "na.message") <- attr(m, "na.message") if(!is.null(attr(m, "na.action"))) RET$na.action <- attr(m, "na.action") RET } slda.factor <- function(y, X, q=NULL, ...) { p <- ncol(X) # substract global mean Xnull <- X - apply(X, 2, mean) if (!is.null(q)) { if (floor(q) != q) stop("q is not an integer") if (q > p) { q <- p warning("q is greater ncol(X), using q = ncol(X)") } if (q < 1) { q <- 1 warning("q is less than 1, using q = 1") } } # this is S_0 in Kropf (2000) Snull <- cov(Xnull) ewp <- svd(solve(diag(diag(Snull), ncol = ncol(Snull)))%*%Snull) if (!is.complex(ewp$d)) { # determine q by the number of eigenvalues > 1 if (is.null(q)) q <- sum(ewp$d > 1) D <- ewp$v[,1:q] if (q == 1) D <- as.matrix(D) # Xstab is still spherically distributed (Fang & Zhang, Laeuter, Kropf & # Glimm)! } else { D <- diag(p) } Xstab <- as.matrix(X) %*% D colnames(Xstab) <- paste("V", 1:ncol(Xstab), sep="") mylda <- lda(Xstab, grouping = y, ...) RET <- list(scores = D, mylda = mylda) class(RET) <- "slda" RET } predict.slda <- function(object, newdata, ...) { if(!inherits(object, "slda")) stop("object not of class slda") if(!is.null(Terms <- object$terms)) { # # formula fit (only) if(missing(newdata)) newdata <- model.frame(object) else { newdata <- model.frame(as.formula(delete.response(Terms)), newdata, na.action=function(x) x, xlev = object$xlevels) } x <- model.matrix(delete.response(Terms), newdata, contrasts = object$contrasts) xint <- match("(Intercept)", colnames(x), nomatch=0) if(xint > 0) x <- x[, -xint, drop=FALSE] } else { stop("object has no terms element") } if(ncol(x) != nrow(object$scores)) stop("wrong number of variables") # : check for variable names! # if(length(colnames(x)) > 0 && # any(colnames(x) != dimnames(object$means)[[2]])) # warning("Variable names in newdata do not match those in object") # X <- x %*% object$scores if (inherits(object$mylda, "lda")) return(predict(object$mylda, newdata=as.data.frame(X), ...)) else stop(paste("Do not know how to predict from objects of class", class(object$mylda))) } ipred/R/ssubset.R0000644000176200001440000000177114172231220013371 0ustar liggesusers ssubset <- function(y, k, strat=TRUE) { if (!is.factor(y)) stop("y is not of class factor") N <- length(y) nlevel <- table(y) nindx <- list() indx <- 1:N outindx <- list() if (strat) { for (j in 1:length(nlevel)) nindx <- c(nindx, list(indx[which(y == levels(y)[j])])) kmat <- kfoldcv(k, N, nlevel) for (i in 1:k) { sset <- kmat[,i] kindx <- c() for (j in 1:length(nlevel)) { if (i > 1) kindx <- c(kindx, nindx[[j]][(sum(kmat[j, 1:(i-1)])+1):sum(kmat[j,1:i])]) else kindx <- c(kindx, nindx[[j]][1:kmat[j,1]]) } kindx <- kindx[!is.na(kindx)] outindx <- c(outindx, list(kindx)) } return(outindx) } else { kmat <- kfoldcv(k, N) nindx <- indx for (i in 1:k) { if (i > 1) outindx <- c(outindx, list(nindx[(sum(kmat[1:(i-1)])+1):sum(kmat[1:i])])) else outindx <- c(outindx, list(nindx[1:kmat[1]])) } } return(outindx) } ipred/R/irpart.R0000644000176200001440000000302214172231220013171 0ustar liggesusers# # use update to fit multiple trees to bootstrap samples # irpart <- function(formula, data=NULL, weights, subset, na.action=na.rpart, method, model=FALSE, x=FALSE, y=TRUE, parms, control, cost, bcontrol, ...) { mc <- match.call() mc$bcontrol <- NULL mc[[1]] <- as.name("rpart") m <- match.call(expand.dots=FALSE) m$model <- m$method <- m$control <- m$bcontrol <- NULL m$x <- m$y <- m$parms <- m$... <- NULL m$cost <- NULL m$na.action <- na.action m[[1]] <- as.name("model.frame.default") m <- eval(m, parent.frame()) init_tree <- eval(mc, parent.frame()) nobs <- length(init_tree$where) if (missing(weights)) { weights <- rep(1.0, nobs) } else { warning("weights argument ignored in irpart") } yclasses <- c(class = "sclass", exp = "ssurv", anova = "sreg", poisson = "sreg") # # Bagging: repeat this several times! # if (is.null(bcontrol)) stop("bcontrol not given") mod <- vector(mode="list", length=bcontrol$nbagg) for (b in 1:bcontrol$nbagg) { if (bcontrol$nbagg > 1) bindx <- sample(1:nobs, bcontrol$ns, replace=bcontrol$replace) else bindx <- 1:nobs tab <- tabulate(bindx, nbins = nobs) mc$data <- m[bindx,,drop = FALSE] ### tab * weights ans <- eval(mc, parent.frame()) # return the appropriate class this <- list(bindx = bindx, btree = ans) class(this) <- yclasses[init_tree$method] mod[[b]] <- this } mod } ipred/R/prune.bagging.R0000644000176200001440000000105714172231220014424 0ustar liggesusers# $Id: prune.bagging.R,v 1.2 2002/09/12 08:59:13 hothorn Exp $ prune.classbagg <- function(tree, cp=0.01,...) { for(i in 1:length(tree$mtrees)) tree$mtrees[[i]]$btree <- prune( tree$mtrees[[i]]$btree, cp=cp, ...) tree } prune.regbagg <- function(tree, cp=0.01,...) { for(i in 1:length(tree$mtrees)) tree$mtrees[[i]]$btree <- prune( tree$mtrees[[i]]$btree, cp=cp, ...) tree } prune.survbagg <- function(tree, cp=0.01,...) { for(i in 1:length(tree$mtrees)) tree$mtrees[[i]]$btree <- prune( tree$mtrees[[i]]$btree, cp=cp, ...) tree } ipred/R/ipredknn.R0000644000176200001440000000520714172231220013511 0ustar liggesusers# $Id: ipredknn.R,v 1.5 2005/06/29 08:50:28 hothorn Exp $ # k-NN compatible with the fit(formula) - predict(object) framework ipredknn <- function(formula, data, subset, na.action, k=5, ...) { cl <- match.call() if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m[[1]] <- as.name("model.frame") m$... <- NULL m$k <- NULL m <- eval(m, parent.frame()) Terms <- attr(m, "terms") y <- model.extract(m, "response") x <- model.matrix(Terms, m) xvars <- as.character(attr(Terms, "variables"))[-1] if ((yvar <- attr(Terms, "response")) > 0) xvars <- xvars[-yvar] xlev <- if (length(xvars) > 0) { xlev <- lapply(m[xvars], levels) xlev[!sapply(xlev, is.null)] } xint <- match("(Intercept)", colnames(x), nomatch=0) if(xint > 0) x <- x[, -xint, drop=FALSE] RET <- list(learn=list(y=y, X=x)) RET$k <- k RET$terms <- Terms RET$call <- match.call() RET$contrasts <- attr(x, "contrasts") RET$xlevels <- xlev attr(RET, "na.message") <- attr(m, "na.message") if(!is.null(attr(m, "na.action"))) RET$na.action <- attr(m, "na.action") class(RET) <- "ipredknn" RET } predict.ipredknn <- function(object, newdata, type=c("prob", "class"), ...) { type <- match.arg(type) if(!inherits(object, "ipredknn")) stop("object not of class ipredknn") if(!is.null(Terms <- object$terms)) { # # formula fit (only) if(missing(newdata)) newdata <- model.frame(object) else { newdata <- model.frame(as.formula(delete.response(Terms)), newdata, na.action=function(x) x, xlev = object$xlevels) } x <- model.matrix(delete.response(Terms), newdata, contrasts = object$contrasts) xint <- match("(Intercept)", colnames(x), nomatch=0) if(xint > 0) x <- x[, -xint, drop=FALSE] } else { stop("object has no terms element") } # : check for variable names # if(length(colnames(x)) > 0 && # any(colnames(x) != dimnames(object$means)[[2]])) # warning("Variable names in newdata do not match those in object") # RET <- knn(object$learn$X, x, object$learn$y, k=object$k, prob=TRUE) if (type=="prob") return(attr(RET, "prob")) else return(RET) } ipred/R/predict.bagging.R0000644000176200001440000002106614172231220014727 0ustar liggesusers# $Id: predict.bagging.R,v 1.17 2009/03/27 16:18:38 hothorn Exp $ uwhich.max <- function(x) { # need to determine all maxima in order to sample from them wm <- (1:length(x))[x == max(x)] if (length(wm) > 1) wm <- wm[sample(length(wm), 1)] wm } predict.classbagg <- function(object, newdata=NULL, type=c("class", "prob"), aggregation=c("majority", "average", "weighted"), ...) { type <- match.arg(type) agg <- match.arg(aggregation) if (missing(newdata)) { if (length(object$mtrees) < 10) stop("cannot compute out-of-bag predictions for small number of trees") OOB <- TRUE if (!is.null(object$X)) newdata <- object$X else stop("cannot compute out-of-bag predictions without object$X!") } else { OOB <- FALSE } if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) N <- nrow(newdata) if (!object$comb) { tree <- object$mtrees[[1]]$btree Terms <- delete.response(tree$terms) act <- (tree$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(tree, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } classes <- levels(object$y) switch(agg, "majority" = { vote <- matrix(0, nrow=N, ncol=length(classes)) for (i in 1:length(object$mtrees)) { if (OOB) { bindx <- object$mtrees[[i]]$bindx if (!is.null(object$mtrees[[i]]$bfct)) stop("cannot compute out-of-bag estimate for combined models!") pred <- predict(object$mtrees[[i]], newdata, type="class") tindx <- cbind((1:N), pred)[-bindx,] } else { tindx <- cbind(1:N, predict(object$mtrees[[i]], newdata, type="class")) } vote[tindx] <- vote[tindx] + 1 } if (type=="class") { RET <- factor(classes[apply(vote, 1, uwhich.max)], levels = classes, labels = classes) } else { RET <- vote/apply(vote, 1, sum) colnames(RET) <- classes } }, "average" = { cprob <- matrix(0, nrow=N, ncol=length(classes)) if (OOB) ncount <- rep(0,N) else ncount <- length(object$mtrees) for (i in 1:length(object$mtrees)) { if (OOB) { bindx <- object$mtrees[[i]]$bindx pred <- predict(object$mtrees[[i]], newdata, type="prob")[-bindx,] tindx <- (1:N)[-bindx] ncount[tindx] <- ncount[tindx] + 1 } else { pred <- predict(object$mtrees[[i]], newdata, type="prob") tindx <- 1:N } cprob[tindx,] <- cprob[tindx,] + pred } switch(type, "class" = { RET <- factor(classes[apply(cprob, 1, uwhich.max)], levels = classes, labels = classes) }, "prob" = { ncount[ncount < 1] <- NA RET <- cprob / ncount colnames(RET) <- classes }) }, "weighted" = { agglsample <- matrix(0, ncol=length(classes), nrow=N) for (i in 1:length(object$mtrees)) { bdata <- object$y[object$mtrees[[i]]$bindx] newpart <- getpartition(object$mtrees[[i]], newdata) oldpart <- object$mtrees[[i]]$btree$where if (OOB) tindx <- (1:N)[-object$mtrees[[i]]$bindx] else tindx <- 1:N for (j in tindx) { aggobs <- table(bdata[oldpart == newpart[j]]) agglsample[j,] <- agglsample[j,] + aggobs } } switch(type, "class" = { RET <- c() for (j in 1:N) RET <- c(RET, uwhich.max(agglsample[j,])) RET <- factor(classes[RET], levels = classes, labels = classes) }, "prob" = { RET <- agglsample / apply(agglsample, 1, sum) colnames(RET) <- classes }) }) RET } predict.sclass <- function(object, newdata=NULL, type=c("class", "prob"), ...) { if (!is.null(object$bfct)) newdata <- cbind(newdata, object$bfct(newdata)) pred <- predict.irpart(object$btree, newdata, type=type) RET <- pred if (type == "class") RET <- as.integer(pred) if (type == "prob" && is.vector(pred)) RET <- cbind(pred, 1 - pred) RET } predict.regbagg <- function(object, newdata=NULL, aggregation=c("average", "weighted"), ...) { agg <- match.arg(aggregation) if (missing(newdata)) { if (length(object$mtrees) < 10) stop("cannot compute out-of-bag predictions for small number of trees") OOB <- TRUE if (!is.null(object$X)) newdata <- object$X else stop("cannot compute out-of-bag predictions without object$X!") } else { OOB <- FALSE } if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) N <- nrow(newdata) if (!object$comb) { tree <- object$mtrees[[1]]$btree Terms <- delete.response(tree$terms) act <- (tree$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(tree, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } switch(agg, "average" = { cprob <- rep(0, N) if (OOB) ncount <- rep(0,N) else ncount <- length(object$mtrees) for (i in 1:length(object$mtrees)) { if (OOB) { bindx <- object$mtrees[[i]]$bindx if (!is.null(object$mtrees[[i]]$bfct)) stop("cannot compute out-of-bag estimate for combined models!") pred <- predict(object$mtrees[[i]], newdata)[-bindx] tindx <- (1:N)[-bindx] ncount[tindx] <- ncount[tindx] + 1 } else { pred <- predict(object$mtrees[[i]], newdata) tindx <- 1:N } cprob[tindx] <- cprob[tindx] + pred } ncount[ncount < 1] <- NA RET <- cprob / ncount }, "weighted" = { agglsample <- rep(0, N) ncount <- rep(0, N) for (i in 1:length(object$mtrees)) { bdata <- object$y[object$mtrees[[i]]$bindx] newpart <- getpartition(object$mtrees[[i]], newdata) oldpart <- object$mtrees[[i]]$btree$where if (OOB) tindx <- (1:N)[-object$mtrees[[i]]$bindx] else tindx <- 1:N for (j in tindx) { aggobs <- bdata[oldpart == newpart[j]] agglsample[j] <- agglsample[j] + sum(aggobs) ncount[j] <- ncount[j] + length(aggobs) } } ncount[ncount < 1] <- NA RET <- agglsample / ncount }) RET } predict.sreg <- function(object, newdata=NULL, ...) { if (!is.null(object$bfct)) newdata <- cbind(newdata, object$bfct(newdata)) predict.irpart(object$btree, newdata) } predict.survbagg <- function(object, newdata=NULL, ...) { if (missing(newdata)) { if (length(object$mtrees) < 10) stop("cannot compute out-of-bag predictions for small number of trees") OOB <- TRUE if (!is.null(object$X)) newdata <- object$X else stop("cannot compute out-of-bag predictions without object$X!") } else { OOB <- FALSE } if (!is.data.frame(newdata)) newdata <- as.data.frame(newdata) N <- nrow(newdata) if (!object$comb) { tree <- object$mtrees[[1]]$btree Terms <- delete.response(tree$terms) act <- (tree$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(tree, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } agglsample <- list() aggcens <- list() for (j in 1:N) { agglsample <- c(agglsample, list(c())) aggcens <- c(aggcens, list(c())) } for (i in 1:length(object$mtrees)) { bdata <- object$y[object$mtrees[[i]]$bindx] newpart <- getpartition(object$mtrees[[i]], newdata) oldpart <- object$mtrees[[i]]$btree$where if (OOB) { if (!is.null(object$mtrees[[i]]$bfct)) stop("cannot compute out-of-bag estimate for combined models!") tindx <- (1:N)[-object$mtrees[[i]]$bindx] } else { tindx <- 1:N } for (j in tindx) { aggobs <- bdata[oldpart == newpart[j],1] agglsample[[j]] <- c(agglsample[[j]], aggobs) aggobs <- bdata[oldpart == newpart[j],2] aggcens[[j]] <- c(aggcens[[j]], aggobs) } } RET <- list() for (j in 1:N) RET <- c(RET, list(survfit(Surv(agglsample[[j]], aggcens[[j]]) ~ 1))) RET } getpartition <- function(object, newdata=NULL) { if (!is.null(object$bfct)) { newdata <- cbind(newdata, object$bfct(newdata)) Terms <- delete.response(object$btree$terms) act <- (object$btree$call)$na.action if (is.null(act)) act<- na.rpart newdata <- model.frame(Terms, newdata, na.action = act, xlev=attr(object$btree, "xlevels")) newdata <- getFromNamespace("rpart.matrix", ns = "rpart")(newdata) } getFromNamespace("pred.rpart", ns = "rpart")(object$btree, newdata) } ipred/R/mypredict.lm.R0000644000176200001440000000126414172231220014305 0ustar liggesusers# $Id: mypredict.lm.R,v 1.7 2003/04/02 11:22:49 peters Exp $ mypredict.lm <- function(object, newdata) { xn <- as.data.frame(newdata) test <- attr(terms(object), "term.labels") xn <- xn[,test] if (!is.null(nrow(xn))) { pred <- rep(NA, nrow(xn)) names(pred) <- row.names(xn) } else { pred <- NA names(pred) <- "1" } # evaluate na.omit (delete lines containing NA) xnn <- na.omit(xn) # attr(xnn, "na.action") returns which na.action is # evaluated, lines and corresponding row.name where NAs occur if(is.null(attr(xnn, "na.action"))) pred <- predict(object, xnn) else pred[-attr(xnn, "na.action")] <- predict(object, xnn) pred } ipred/R/bagging.R0000644000176200001440000000225514172231220013275 0ustar liggesusers# $Id: bagging.R,v 1.19 2005/06/29 08:50:28 hothorn Exp $ bagging <- function(formula, data, ...) UseMethod("bagging", data) bagging.default <- function(formula, data, ...) stop(paste("Do not know how to handle objects of class", class(data))) bagging.data.frame <- function(formula, data, subset, na.action=na.rpart, ...) { cl <- match.call() if(missing(formula) || (length(formula) != 3) || (length(attr(terms(formula[-2], data = data), "term.labels")) < 1) || (length(attr(terms(formula[-3], data = data), "term.labels")) != 1)) stop("formula missing or incorrect") m <- match.call(expand.dots = FALSE) if(is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m[[1]] <- as.name("model.frame") m$... <- NULL mf <- eval(m, parent.frame()) response <- attr(attr(mf, "terms"), "response") # just extract the data.frame, no handling of contrasts or NA's here. # this is done by rpart or the user supplied methods DATA <- list(y = mf[,response], X = mf[,-response, drop = FALSE]) names(DATA) <- c("y", "X") y <- do.call("ipredbagg", c(DATA, list(...))) y$call <- cl return(y) } ipred/MD50000644000176200001440000000761714402371032011674 0ustar liggesusers82050153e3767ec519693c66a0025945 *DESCRIPTION 977e05a9ebf9500af96b45199917e62a *NAMESPACE bd957f8c9597e280c5bc486db1b50fed *R/bagging.R c55f9e65194e69cac7b3f6a14819f7ee *R/bootest.R d8b949efde7413d8d639a64be6528403 *R/checkfunArgs.R 85963ba4287c3ffd4c84dbe8a48d9c28 *R/csurv.R 16fcd24ff394cdec7d929f8acacf5823 *R/cv.R b17e241d115e8e9491c5e07e612295fc *R/errorest.R f3f9979d3d1de170acc34bb1cfd15ef6 *R/inbagg.R 9bc7ff831e43ae443c3afd0015f9b45e *R/inclass.R 96677befab2b4535bc23d755ad5f6c92 *R/ipredbagg.R d22859a559e876b2e478d366b705d0a1 *R/ipredknn.R a98cc646bc7f7a9173d511917bc2db34 *R/irpart.R 6100af39d915430ab680a3889d52d4ec *R/kfoldcv.R 7d64c8043f1426fe3f41e324dad74be6 *R/mypredict.lm.R 51cb58bde04a793a35b2caf233517f43 *R/predict.bagging.R 1714695c8d4dd8439fc6d1e3f7968e1c *R/predict.inbagg.R 301f8e08bee09b9817e8ffdce0a4c398 *R/predict.inclass.R 8b4b9b3039c4fb86d2c96555b32872a8 *R/predict.irpart.R 5427f43c2b21d2e7f3c6ff2fc521c33a *R/print.R 7fff7c589cfae0b7e57f6af2bf6974f4 *R/prune.bagging.R ea8bb0575e5ee83b6a4d595212906f83 *R/rsurv.R 065dbafb0e228351e7b2a4e25eccfb0d *R/sbrier.R bb92fccd557c101dc3f52a3c7a7a1961 *R/slda.R 51c9a057a29e6be4477aaf23bcce4000 *R/ssubset.R 860d6ab9df7627b349a2fd9ac71237fe *R/varset.R 2b96f3002e0d82a14507edf0096eccf3 *build/vignette.rds f83cb6bdc4e6265a64be0914da7979f6 *cleanup b77f49ce74f90948e4d09122c5fac724 *data/DLBCL.rda b07616370b51419752d4219f1f4f9f55 *data/GlaucomaMVF.rda 1f87b4f0d914888b1be71028fef8d316 *data/Smoking.rda e54b730797d71b1483cc26bfb3ea552b *data/dystrophy.rda 45a8a599f130fd94e2bf0ccea723a290 *inst/COPYRIGHTS 58925eca9e1b5149cfc8756a2d8230c1 *inst/NEWS 82eeec327b400aadc3c7fe0905483d8a *inst/doc/ipred-examples.R 72c8610d330afc2376692437ffd0b5e0 *inst/doc/ipred-examples.Rnw 35dd5f21fe1a1fcdd3f0466286bd8c88 *inst/doc/ipred-examples.pdf ef01004837ec42585a18b1292cc00952 *man/DLBCL.Rd 3cf72f2749b7029a0b8b685461d99d3c *man/GlaucomaMVF.Rd 8d16887d434ff61979e9f89f26770ec5 *man/Smoking.Rd 716e562e6d5449abc3bbf954ee18a135 *man/bagging.Rd f4694ae7448c30a49814bc23fee777a7 *man/bootest.Rd 920bf08095b01ae4d943fbff21eedb57 *man/control.errorest.Rd b9874254d1a440ce7d1373ddb88ed18b *man/cv.Rd 54ef878e658ab5d41e3a54642ca9a451 *man/dystrophy.Rd ebb22088953b88f1a62151d295fc2ebd *man/errorest.Rd ca29c817b98efbef404f5b8467c796c3 *man/inbagg.Rd ceeaae7b39770e87036bd86c4b899a92 *man/inclass.Rd d34608ac849321130774fc768f28c475 *man/ipred-internal.Rd 86ea0f01231c482541c1ae86fa6f1652 *man/ipredknn.Rd 37eee681bff7ad10df28bb18eab6cf2e *man/kfoldcv.Rd 941c6c8d91a74ae8a046811346664b0b *man/mypredict.lm.Rd d1705bf690f5e92abf7123ca0e0ad2b7 *man/predict.bagging.Rd b3dbc86c0c9cab4b549b162b75248c31 *man/predict.inbagg.Rd b80ad2198613b405333aef1e6e3cc504 *man/predict.inclass.Rd 0da9ab0fcef3c03dc6f04309c8830b83 *man/predict.ipredknn.Rd d45eeb09998f42d52f180a46296b9744 *man/predict.slda.Rd d02437884a49f5b9937791ed6d07c53b *man/print.bagging.Rd 755e22b8d9799ff3d55f7364be332931 *man/print.cvclass.Rd b1ae5acecd41d145898b794567a38231 *man/print.inbagg.Rd 0e78f227c282e6474a5b504a74b97fe2 *man/print.inclass.Rd 8903ad93aa5f9f092faab0db112733bd *man/prune.bagging.Rd d88328ca9e52b501e01d1b291de8f16d *man/rsurv.Rd f617299850199477617f8c80f9967fae *man/sbrier.Rd ac34c8ccf9d10e1e15e7a2138c14b2cb *man/slda.Rd 9000c36afc63d72103df0e5c41dfffc5 *man/summary.bagging.Rd dd36ca065d305401e0326680b5cda910 *man/summary.inbagg.Rd 8ce9a1f1379d492df5aea55687b6b95c *man/summary.inclass.Rd 79fed002bac6fba4b49458b352873e8c *man/varset.Rd 0ac59b9f3655966e0fb52ca8a8b2b27a *src/SdiffKM.c 4c8242f0f0243ec116d9d1dd8ed99150 *src/init.c 48d16c5ed9e2eebd086e1244a0839308 *tests/ipred-bugs.R ee8dd469906916e9c86b5580f51b8bc0 *tests/ipred-bugs.Rout.save 6f1c14a02814a289c65b56125e576d3d *tests/ipred-segfault.R c28fb98cadd385924acb0c03856e4149 *tests/ipred-smalltest.R 2d85b750ceb36f266749f63921c26fa1 *tests/ipred-smalltest.Rout.save 72c8610d330afc2376692437ffd0b5e0 *vignettes/ipred-examples.Rnw c642c366927d8bf2522e3c078f6e34a2 *vignettes/ipred.bib ipred/inst/0000755000176200001440000000000014402361655012340 5ustar liggesusersipred/inst/doc/0000755000176200001440000000000014402361655013105 5ustar liggesusersipred/inst/doc/ipred-examples.R0000644000176200001440000001121414402361655016146 0ustar liggesusers### R code from vignette source 'ipred-examples.Rnw' ################################################### ### code chunk number 1: preliminaries ################################################### options(prompt=">", width=50) set.seed(210477) ################################################### ### code chunk number 2: bagging ################################################### library("ipred") library("rpart") library("MASS") data("GlaucomaM", package="TH.data") gbag <- bagging(Class ~ ., data = GlaucomaM, coob=TRUE) ################################################### ### code chunk number 3: print-bagging ################################################### print(gbag) ################################################### ### code chunk number 4: double-bagging ################################################### scomb <- list(list(model=slda, predict=function(object, newdata) predict(object, newdata)$x)) gbagc <- bagging(Class ~ ., data = GlaucomaM, comb=scomb) ################################################### ### code chunk number 5: predict.bagging ################################################### predict(gbagc, newdata=GlaucomaM[c(1:3, 99:102), ]) ################################################### ### code chunk number 6: indirect.formula ################################################### data("GlaucomaMVF", package="ipred") GlaucomaMVF <- GlaucomaMVF[,-63] formula.indirect <- Class~clv + lora + cs ~ . ################################################### ### code chunk number 7: indirect.fit ################################################### classify <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } fit <- inclass(formula.indirect, pFUN = list(list(model = lm)), cFUN = classify, data = GlaucomaMVF) ################################################### ### code chunk number 8: print.indirect ################################################### print(fit) ################################################### ### code chunk number 9: predict.indirect ################################################### predict(object = fit, newdata = GlaucomaMVF[c(1:3, 86:88),]) ################################################### ### code chunk number 10: bagging.indirect ################################################### mypredict.rpart <- function(object, newdata) { RES <- predict(object, newdata) RET <- rep(NA, nrow(newdata)) NAMES <- rownames(newdata) RET[NAMES %in% names(RES)] <- RES[NAMES[NAMES %in% names(RES)]] RET } fit <- inbagg(formula.indirect, pFUN = list(list(model = rpart, predict = mypredict.rpart)), cFUN = classify, nbagg = 25, data = GlaucomaMVF) ################################################### ### code chunk number 11: plda ################################################### mypredict.lda <- function(object, newdata){ predict(object, newdata = newdata)$class } ################################################### ### code chunk number 12: cvlda ################################################### errorest(Class ~ ., data= GlaucomaM, model=lda, estimator = "cv", predict= mypredict.lda) ################################################### ### code chunk number 13: cvindirect ################################################### errorest(formula.indirect, data = GlaucomaMVF, model = inclass, estimator = "632plus", pFUN = list(list(model = lm)), cFUN = classify) ################################################### ### code chunk number 14: varsel-def ################################################### mymod <- function(formula, data, level=0.05) { # select all predictors that are associated with an # univariate t.test p-value of less that level sel <- which(lapply(data, function(x) { if (!is.numeric(x)) return(1) else return(t.test(x ~ data$Class)$p.value) }) < level) # make sure that the response is still there sel <- c(which(colnames(data) %in% "Class"), sel) # compute a LDA using the selected predictors only mod <- lda(formula , data=data[,sel]) # and return a function for prediction function(newdata) { predict(mod, newdata=newdata[,sel])$class } } ################################################### ### code chunk number 15: varsel-comp ################################################### errorest(Class ~ . , data=GlaucomaM, model=mymod, estimator = "cv", est.para=control.errorest(k=5)) ipred/inst/doc/ipred-examples.pdf0000644000176200001440000023253614402361655016532 0ustar liggesusers%PDF-1.5 % 1 0 obj << /Type /ObjStm /Length 4243 /Filter /FlateDecode /N 95 /First 787 >> stream x\YsF~_1oI} r,ٱcRxIPBR~@L@=X14s`ioc6X3}`9ˍ`IɤR90-de0CL:L)}`J+<3eX A#ɴ0Ԉi- 3[f\9Ϭqќ`NZi `xC4`xyR.yƠ3es ǡV3#n/c4 2ǹacYxkI O ^0^NXf AS E0衙yF+ 94`u$6ee PV!P/K-)P s$TyA""= =X FlD@cR izP6Ƒ24=zP69(>$,ʁIC@SZC[:([KO'p63x&pd+]9(zV jKׯ2~P,rr0j9Ol#pb?h?Ǐ#l~ wp9M˂ OhLP#!⇪=?<.ޱ9;8?g"]>|X.N%3k3&GW[,#rRQl9ۆVw*\•w:]Lq}@K8VM%!:щN}DONtbJ':IKtYLA3O xdqЎ-+j~ވg C^!~>;9*E/KxNmH%+icS+X^X֘w'{Ui 6*B4kk`J|ؼ 'nPim]>mFuV*}t\ߩ5Ȼꣃ%s؍T\i_6SatCt"Wl'|b'}I#:Tw{TqwXRBLOx[ QvPԐjOilu=@jWާXX]4l܈fW=ZwSƟCa~ď-Q..4"I^LG9???9y1!'0)"U_Ұ5)Nh" Frrfbq[U'!/(_O3+泖2]ӲܟbƻTck=cxu%x{zd%xNoثګZm^5 8Ѩ`slH SgW^%EpfEɕ1MePόDˌ߆=&3yvMa\jR=bt/#z**ӟkUԹZEoyBr],M_sRsݶ!l-8KWmӢAݪmӽEͶU_Xɵix8hxK.Nx2*0|O*щɇZNHR-&4vm=jD"匘u,2t-g!&reW#>U[l>*en< *hJY*I qH?Bg-,Wdx߉=yUцx 4^Sur/w[s& 0 $^Fl3khUh** 95ܶiզ qձejmrZ--Z5پ{ѳUuܭj}_ѾY_wl77S%u Ĵtԩ籒_5"ݷ.f ԓ1*Ly;fӫ`u|6r&7Onh@s"_3; {u=Kl14?˛\JR:A5`OT8N%7xG7i:V)SVٮoz74:Z "5ogu|=O(W<9_ߌZnXf?pժQ4%Pp9 Y0T!ɂ^$ĭbII)nS[M\ql5pEMnd*) e1!.nƭ (g`3ĨXbG&bӿ@k$k<cʐ|s=uA~Cm}LXS?#,yΜ8endstream endobj 97 0 obj << /Subtype /XML /Type /Metadata /Length 1387 >> stream GPL Ghostscript 9.55.0 2023-03-09T14:48:29+01:00 2023-03-09T14:48:29+01:00 LaTeX with hyperref endstream endobj 98 0 obj << /Filter /FlateDecode /Length 2130 >> stream x}XK)>g:R+ m7*ە*O`%O~}R[kϯ_i.OI.4?j'c.ܝ^3q,j7+wp2+T zڬ-s-Jӏ*[VJM ;HAieUm>V:SVȏɶ[њe}0@ՏUV ~!ݡD2^(GrT 8-z ]hBi$',Ib\≂qLO]et%q]@=gȳ,??#1v%6XzuЦZHޢ CUuBub-JJ,1P<Rd'S_@ɠIð'04$II@0_R*1Z},C,J5ס5ߡ0dg=9~Fi1Vі(PlU2Q],-s䭀`jh&[/=FJ WGYO#dWwגJe xk}Х`l{ȯbU%ТJKRlqsDz a?jOCdQ l=ѷ$Afr=[GeC͐D;tJ=;D%h..bӲ*϶C 6b@x[uHa 0r4ٚ'+yZ*Xޠ˜qk| D>DxNlkAu\H-Dַ[Vm55 2/S}ÍlX_.rq&| oއd@X.[!VSZ#R՛O8#8ET'B!& OƗ$ĮQeL93\#qJ/aX 6pA㍓CC^XBcP^gnT+7'um̳X_! l0.T.dTk^qG )i`vulSWkpS>46D*^5ܮϩƿyii8Rb\KKQ64%`.TdeJ&nAί|VPÄsrzpM̄%?%="HDgy)M];gDH &cSG#>tr$)Q &l3s9b J?ԊMM>EĄ5hr}ѱj@>WhX N}SԒ"m*!g:T1uh1X19M}Jǎ)|Fe} )$'NϵfyTc Ɯ_X -bQ !MQz{`1s.`kcu {,OQczG;[Hp<{ehE!XIaem;&mĴлC^1wRуendstream endobj 99 0 obj << /Filter /FlateDecode /Length 2772 >> stream xYK۸qn{)>%C[$U:T%vmGGLIf~0" 4O<._=t'*VBNs/WJVȬ0nuQTMQf)-.yѹ܊ RTCJ-cM*ވ:Wiǣ$S<'>I;iu%z:y}mws;YfVjϻ([HМwBzGѦep=s\xnϻ >=ӾVkIg9-tuie!eNVnhmAJLi[P"V7R9?ùRyХvq+#0`%?L!! #S(%moŰq tꣃҊmSdhš %~h$iZ|U癨50IcnoOK⮩ 4#U^ pRR!mz1%qp iX᪴Ԋmp_?u9$"UI2\=n"R*$q/N%`B}6R<$3/}αː&P‰9)!1xw-jWWVYn qHpLѷHd/'s8?Mr=WF\A~tȰp RmIf9(IJYbFΨ'iE0Ԓ7^# :Y_G~&;"Yrlό1'%DŽYftNBO1#2 ȿ$]L3WE3?<sLƏ.ӆ8i`4#9.b \7_5##Qqo3F׊c}v Ǿ:H'ފ1ެeFQՒwĈ63 Ȧs:rKjz vt_u7C֔,o8ϻic93( ( ʚiAѸBJքSwTJ C{.𢕈(ktsSJ(5lP(A 1p3ǰ+ x߆G$dBF \¡B~l*S5`C @M 8wQsGDe. gk8gFG$:ܹƠPZU8/SkJHy\攢CeF-2\c<o+A OcNSϑvcYbk?R߰;'ʜkCڣ<z<= B`=:<,Ԙ>S- c䏎'c(־Ȑ)ַ"m2͒b5r/P5c H_fFū.%OeMu8蠈=<m$?r ]ӏdaiehOEn8ĥΥ&Sv}{ŪDgU)M jX(g,]'U IH@9Ep~ hMnenH\[f§ BN qr Ve`„gUS~hyIHgei {)t.(gX^0//\ܼ?pv7uZ7öo#pOimbV!I3Vjͬ[ 7Ey4Kqd6 ^NXޮ)>"s(Vha _.}lb ֔JU\= V mV8>ScfٲҖ%-8 9bF7m09Asbd!CȈ #bw DN؛΢V̯7_ uC?}K7ɸ_M%q ZZ0 bi2^X3 yPOiċK,v!8 qVôt @`{9헻{D%:L:jNHI WT(\7xb_O_[LQo*px (l/4ۙ*Ѕ TZ@֣UKc:>8C"ڃp>rKˏ>A @-pf}~=qCKR’aIӂV߭e\/#7$UfR -QvB_udžfyh~14BV+',~$ Tcokɐϩ+O{-wRc?,8:£1~ƿrZ/n6(uf#'j=k/ V:~o/yhjkZalDƔj)<|r zbMRX5}ܚAH?4~nW_~_y_gͶWϖWލ7iDDovTxNK 7Ӿ4iRۭQWp&*=4-*}WM>)vz!]}.߭CD0;Wik;endstream endobj 100 0 obj << /Filter /FlateDecode /Length 2388 >> stream xXKs/dLY A YIU&5Hh:eK~v7%gG?YMw-7n8.ܿ[_.0Y/x^e]-TӼh&Y5,9*vLc6ffdo`qq_}nǣ9{cv lcefEL3R5lWI%`G0=KpPE&KNɿWeYfEU¢nTn^ V)VuU!&oP qXow"(AK1:A{Jɸ2P1}Hr{{jh/8߃.$j6zU͞~YҚ ¨=4^d: v0Hf:-(M;? tJ8K{vNo=[P s3~``Qs~z r'Nohc_!^쇎1N4hrz$N1[< n!0UPI۵u]Y4` Y(b9rp5zJrE`1 ZT%HioBvRl;@urf 9J*vHektcO짤(pnb(HٛaΞm踦.\g} 4ARX-.ȺvQ tgL;gs'G7b ݽ3r"kk.0jX]*)_} N$9xYZ"'NJo5N>yz«;2bD%Un/a4TBz$%^oh WU,aTFcT|H*+rXwv_!6?ɚD*O~mLq0A=7Hy8id]\m ݅8 *αDBί#n/[ .I/YQRQ+EVY*x6W:Bm,e{)hb^qg_wA2'ǹz!<y B.yi`4ν@3J(Nz&cbX ?>6LUYqm4^G!x9e)x{r*(PKCVj&-6Yz WWn 1/?AK2o3]?+;M4G^KA#o}C4?].*Iض)~OG%S4t|eqH10;GЏ,q N^jm?]5}K]݅a! 3^ cE9AS˹Nh ojJ͡j|9^OEj'D]R!54㰔(ѥ?!$G?wXÌx~4~wK+Eࢳ* iW\+W%?9%yiƥV7V[+p M,^Tِ!@cԖj.Hݫ$!FOƴ< {^{d0l°8,H j"K/ BgqDeEX3JhJ#> WĹ칶[*)jm[scI4]YSS2ĂN -mus5OLnh/D omiM(hnq8 q,)ysص78M-Ȍ^ݟfY`%% v{}x5Ϟn?5tC 5h^T,TA۶rqi{(D ɺvk@99JKmxz._2Z'E6.~E忐LQ8N yUν_Z"zdY\\]U8>H*J 9> stream xMsJy=P,dfF)!!/@+ssj嵂V'icthZڟ6a 7!*~vcRxa.7׷WHimX*6ק6 (L reد<5[I 2MD=C?y pϐ.MaeʂoFt(iHc/<Ł\Lru"Bt,Nf?xe)f:Ɍk?<փ7 x+JeMﯮAY;w 8>d֢bjSAeYXyt^;0n`J %78{ߵ=wE$j'BE/sk3@k2L@vk]F4jE, }qD$%aOHd@2T!2Zj ~8&bvrR%2a)`Ê] [ )w02sUw+rS(%QA&ƾ8(4iq:76@ #RBsoS=2) A(`61i<Es^.hAáF"ECR ew:_¾E9!< s7u} ڃC}l3(F(8qn1bq;Џ(tCe tc)t " )/k9&j.RLS0DO[j׭նA41Nh*zKz[ 9 ݎNȎ_zbJY% an2{Jr&7h`Cfş˾vLנKX_eP|~PQafѤJއ դ20$S}5#< 4XksPYPctz5%q4H/VNzzçY𨡻@^Ҟ?S=&`1_9n$ {4(֔!6Xo#R0lMM, KԔmRʕxbTsYE,udP okjjԌS:tLeXUdtx:egV*լЎ-ǩ;-LlX6j|%a۾Li1-̅;(1fH{N03 rKpr~+nOL\/|3V=Gޏ~-Bq|Ͽyh=G25Pe4W|w6sQd{ GQjv\N_v RW80_P2exfW,nʶ#b^F9J])ZQD~ͥxsVj*1ۙ1ɭjŬsHan^_s5D#VVXR웁=2ջ12I7yɗpmcsRz84)9M($?E2d5[Ft+y&i?}DReC2Ѯ% F0B_-Ӑ\Pȸ’eg XV _ <o{Wsyҷ2Abv.t/^ÜSO.϶skI˒-[+즘R$UN8Icpشw7d;ma@4-R9s<`Gj j҄;#asYԭ#n p1P7n"ei3$/ smqiE;=ʄE/b:5J_aRKrbN҆Ff Uέ6rVD etvJ@02hHۻ-)?ddYu20͠+Zx 1ԓԪ9}6٢f9Kzz/Kf!p~aV9#&VKXX󈒈qV.5).ג6OyhLpel 6As[V?vpWE}]L ikiʍ*\3Q$Nq$N_Z 5Hn~(q[b:ۥMo#( "n񋋑Oh'(1ʱqi?4$Qb$5,Ki/1kHbJJG&?%9!)S}:L3Mtlg%e*0gP Ff#? `B6ԤOj3D84.#t(dhvXdt+%b< []mil /JA4CB{{}w7:endstream endobj 102 0 obj << /Filter /FlateDecode /Length 2737 >> stream xZoܸߐܞW'R.iQ4}AҶ]Fu&!Q#ߏ믋8I;z"it?\xt~"ReQEHej{YaJdij٧8.cU]/*M ĥL7Hu({)E훺 N3doa)AK¬p*4׺`+dTRR{u%n+ b;Kn臮/ (2wwE_m*8̀,e*ʽ2OG bGb7~0[SlMKϭE_Wu`,&푰Rݼ!׸ٚv`Y7`F4ae= nn 8_xK`Z{w~ʢb'&10yݰ#ND|b Ԍ2!?PZJ[MkȒKM: r)ZA&_/OH^aYvq;g'7EOlx冓uajEׂ&_eE>16/cx" >_aw >f$w7}|(Ѳ4=\`1h/c/3}d[̌xm_ fE8vf\)b5ķ!x[p[k ʩ:r+rA~q~o9kD[BQIM z< 8>}e3}7[&0LRkL9nBtabSET*/08 E=IQ(3y-XN삸fq.xݙA *lx"+{YDbÚ'LOb~؂ (^/ye? ŦeWoX-eg}Qe- ]@=lM1 ^ŵ u]vbSYLVZ ,˵-o?#xsF#0"HjO!;J" vm ́H MhHg[5);VBQk9d sITpL! _"~'8ƹ: ǥ` ɊHgYF)#70RKj1#E4DɾEe\ttR}ʇDܑ-ORlDdNρmrg 3v2-cLLٴ5fܕBtWEϱ86MӂBO#l4"u#!`GC(l7@~Т.040qWkڬ5r+xRN~$gDsL&ܹc$0V)H6RsBqQEv<հz*.ͭVv'tv†vb$b))0}㡰S$BsSu^]mڊy_'^V<׮lM@{:G(Ǔ@ʟup'a .I-lfW\g[NƦ:)G3.|CPKq Kq0Eg4< bp?ر;⨀z`?l`sg%K(9,i*zEJi["㵾|bV -U@Ey~pX\l<׀[?%\8C2qMW ^CqXF%{ p-#|A.Ki%TK^ǶUV IU) tbz?lRJ7څXCy=H7پ VE; s9b.#c5]̮ ovNy{hXWF4@f|ˡl,"^#*B,5%EH|=41Q cȺkmv:K@JFDO֧>}|= ,fV0ڎ~,F؏53MT%#a ! 1HIkYʕgI3B!X<7 N:2^51@Ss֧RħY=fJjdFېё28f/$Q:x7plɼ3=)"11 u,GtY~XXH?Eƒ?h3M3SAE mG1<ڋvRI 'xpFpܒ$o٭k~+ 8̣ JԦ&% K"uD󫓿XÝendstream endobj 103 0 obj << /Filter /FlateDecode /Length 2312 >> stream xYKooXM=ɐ&H `[(GBT]HC>{ؚfwuuWOx?lMWfՏI +a1G`0ͳm␫l{l>X<%.Pq"c. #4YGgB_=BiŜ{&-InG._[O߻-~'/]d (vgc~pgUG΅b ȕ3%7+uҥ,&nOe^#}@Ӆaϡ j QUAT[|7)%bOX>z!jU0=ɾuL)/_Km_{k=nf*{,CǕ7PU7U VyӚV1ώDf$ zQ31 z{/L鯈rQɪ'sEODN' _RyË_,o*  !<]C5)= TdowK2͡8_H@ 0Q7:$N~wwz x>k 5 $ U.h'ŧpHsw208:pys c `䩆HK+KSb|v&b:)4s o2d==2b.88nC~PNx2?3xy9,N~a(sk%,1{~eps!]~ cxqmL< e]LHC}5ˈs_r43 jHaX9c Z&Djkr&KN!y:4!dy}W>x-^\Iʪ"L#/=n>oΗʹ/Ct ݭӳKA)DDzOAvM..Q 3e_>! Df0 1P0;@jSơhB#VQy.XltR@|5uSBB%D1I"htŠ.:R_Unxa*Z q CyàSI>U[1ɰRr%ϳYKaFCTWv'R~/$w)>8*GZ\8k$" iHhH 87gd8N͠ blb42. .4 󡳔1ikc (a`Fij*O؞/z|AgR}6.6,K]C-uPW|JȻlcG/; 4f/@%" ks'4-y$Bǭe+uj);q?} s !a雞e">hD|V-yl_ ~F2d ťЪynuendstream endobj 104 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 789 >> stream x][hg;јNPPJAڨi-Fc&Yk&Kkwf$ٝ("6uF>4TR> stream x{L[ǯ%$9ث&4Y&MKBv*Z ?1v1~`y;2JFi4[hڒQiՒ(?_ZɦhMJst|9#RS(L~hpǟH^ )<Hz4Ƞ!#U̓_MLd[~Aը46T4 *)lhljZjuMa)z:DPԏ2jKj/GrMR: YK7Vfj]iϦ}.lT٥?Ra}y,>vƚDd]g .t7~pʭ]8laI. O=HW+0#S0 F0B@mE;d&H CN&&F Lm[Z5H48'ҡ%IxgTyխ3\Kep@{ssw5Isކ^fi'H֓pĭ:@ b1z:Flpd(a5BT#Uo}. |e8jz[h(20]=R1&EhrܫNѫy0+qJ?[:)'Hvo-.g?; ,ނ|NYk`#=5'_wmo9dhO_`b=X0vv8Xw s]1d,ИMVH֫Ĉ7C .]9ǞLf LeCmXAfj7Ɏr/?,حЄYpl =!F񵜉KK$Ob}FCkߡ~is\ /YƎێ_q=x'6.DC{횑T7NSpo3}/%Y$J޼~c\0\C`ػo)\d>pʛ~VAXL4>,ZM5O:5~3"$g8|.~ xwyg*+j8ңTu^F*<Ͽ%c-2Z$ Eko%GФ1"pH`jUz֠zQnvLCA(Ν[5N6Ol57|!)?X⯎)_FPR񩗒c3d:Pɤx#SQ^Z8xcB紶iFFcltp3r;[$pu'_''a5_&57y'9\V&~%vT&9`;8o͔V)\:,%9wg"͉] Yo采)hIUޤK4Gz30('AgGF:E@Nendstream endobj 106 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 8150 >> stream xyx׶9C B$B H bӫ)qElْ,GV-"YnrSLcH @ 7$y I޽gKY{޽(@g5Ӧ1eψ^>;_];rM?`Q,K_  O{qM88_ޤ(jѻqH&,J\,]^R2ypJȊЀ"wZ.f}xiM1sfL$,ELFQjj KQzj5HM6QjZHMEj15ZBMޣRөe j95ZANPlj0G !zSC)j5AEQ}TJL=G ⩁~b3 P%ނOz3^ }H0?LQ}? |c`à U+rS4C5p8#F^X56|//_zʗ3jeG3fj?zLn$45ORXj751@WBعqg }TN2Y(Ui6q _^m (dur9^^:2h#>6oFV Ezѧ3_$.n~+ $jHN㤖 W~XvJ6sIC2ƌ:E[ =m?UCT2S6Ћ7хo}7T=O$ETzm(@I 5Ld6&Ki$0X5k\KRV9,+7XaSWO/ 5BF?%Ř2b'Bu4)MHK34v.-Oqtod 6'ha%x=6Qsg*j~~ʮLr >CM|³Fd= b_/p_|W} ,D|X*4BX#bȼFe-y&q doE8o<P2:],MrYj.x|( Z^N薁Ds9 䆬9vM>,~Ws+ׁapB}Oo24RTnE~=h՛,3'" 5|{9Fnbo.sEfܨIf'% 4FI}2P3oǎWe f|Z^CRlC rr @Q,Hn4W/۶Zmf\%#Pu>1.%M+a1 21qVeFehCdhra: L}ykOxUk6fqF$:e͑nt5Txt?cfUxf n$suJ?KCO GKPTn!!3$K-9Լ&o!P*gܯt ~nQD.xޭ ?NeZ?ZegKsˊe=&GĖl ͘]5Y7n2Xa"co&Xé92eA3oA3: bcN[h;U} %U 2ECrbC"+J< O;p>:I<QRY=T y"Zư2 .8W\*=v2ʿ>aʀyd_:̢j2vQ!i/s]ufL K$Z*C- ebDa腺láJԉ+AH>MtT1ke3}m61rGrd5VN{X'D)OǓ؜ W)C <.K@ϴOܶs5[;NA6Ǩ?= "9t('F>(g[Lx8,5HUU.#g9F5s:]/Ru^Ց=E5x̧zX x$^/aGgHR N@sz[{운w9'O U0=-Q.( }YXe)-N:4913U鈤kԲPC:z 8E  3$̀ ^uRT:/,&|y|o˰:ܤ#WB.ʯ[pW&-'3P(KOHLv*+|k}qA>}3,`jXՄdeH#Bw@p1|z7"Ι}^֌s| x~" V( fjsraw+50F>Vu;{֐=7h'ٍ'۶ݻmlP)+/M"Fk #/˶)g$ $bޑ{{cSW9 #Ie~n`gx79% -Mcզk61x[nٓ|CR&4y0>m lO*5<1+mE?Ꝃ2+>O%RLCz硾x+& 52EOGྑ8.ժag\֞]r:7p|hrN""jmAGץgAϷ|1];]L ؜PVէ5d5|tc9IVc0rUh4D@zqVie~IIS\4**#9:$Vr|"6^ `xipFQ><`; RF (^tb6ہɢO>Z|b5]ϊOG (O3V{"u l<#? FcsZ}m%` |/,}nUiEI4w 3hP!JVz{4nk0i,W]yP ǚoQaN6pyx;>Ml^0"|zJaJ _U{a-{ }s湙D'!x3$xAؿtz<~KMdt I07q~`J2`2T&ij!]^ 덆="kTɺ:Ϣgy :DM>A}}6SX rTk9-kU!1—W?8o+g͙r[lyR$f7 [ I<84U1*0w͒7Ə= @$)F5Q5*';uIQĖW?m&L۳rK%hioR`*Q3kiܖΚ\T ZYY$]&a-uuv5Iވevwv$B֎MM-]惐O;އDh"{K %b4w7-[A=Xʶgsv~9Ҽq)qo˽CĐ'Ypt*H/N/̸:ߦW,&U$HY+ueeU(*F<%~{%bP^%MLK"ޏ bs~xy|{hۜhju[nZ'krrtP&-IǗC뷤+Z 3Ϣ;:N:~;4`՗&L;ZRZpԹ\q$QP{@߾E_Gw=E}Ǫ.x ʼ"ɒB(gT|;b+&`,|&̏4UMl][WJI0s:irj=/8{-\2oBEVhLmeU+ř KO>Ws>xBSbb T 1ujوGF֬J{_ \Yk"oj^g2*ͱن c#K"K"rC`\FH:]iy-7S92ZDdRpYB`Pъ|kmZkqnG1[|b*Nfi0T%JE E^yى _ z}&2׫wͰUYHIe2{e.gWVZߚW)o1DXʥuk>m+8pp8\š0Et Ϗ.6[oesy%&ahp X ==_S(eYt7D1x GvЕ[Mjz{/\9:/ bZ8bdQҥ.>ع v[,!uWU6}[=W*F5%ʌ%??@Kiɭ-\RTIjl͇ _5dӦ-DXn<,~c1Z-],VZaƪ/?N\|{wK g\k6/euzrʴ-M59~[1x|'" ǯZ!I LONIO"R,u9Z$G@&g  p ]3/^Ǿi"aׁSTR8vȌBUoH.$CP"wK^oO1J J$ qܑ#fCJB!SP6^4[k.wN^-NOWgJN5)>K*Q6U[|@p/Cc ަ5[-G7 Ghd"L۪hIVq{S>}tg=T֤سc&63 ~5ϘH%ju?WZV*GQE U(EufU0 *@O?*SO0E]&Uk L<u.W`*L ,DhޱFZeVs5jVd|63Dhs+ɑ&*1JD:NSa?/ܜ-urYܕKZե y=vQL2&ASߑCrڢ Ҏ!&١x7ZRWke![s8zʠZ79r?H~|SW$*GnAV2FZ-qV r&JX-)#g arWUwi>E.И풒 %S˵3x<{ק}(%endstream endobj 107 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 3762 >> stream xuWi\S׾=!sJўpzGl{ɡ "I@ 2! A08Z֪Xm^kZhwK%'w>kZ+ىp8w|bdE8q#\h.>h2|sS3 >k2|>҄#EQl_`f iĤ7S,\IBSSi&*NRIPTZI-VSZjAIRBMspN3s#LsYr{@#F5Fyn'FSt_~k4)(>Kf>x D4Z{Z,ķh@hl<@:xي՛X\r Ȫvf1F#Ehv\s Yl)  ?<{GJMӋ>^? wh/ţ ޚ{oÇ_SU"K/Lݩi* 5=D9fmŬ\1Uأc!ϣ"Kɿ[Е~Dq1w, kTWfEj4 }Z{ܠ4#.U*ڤF vVU\Ls(RۉcWߩ1e!vZ[ʘ⊒ں:5TURQr/b_@u^5fAQg0A|݆4jsv%oUlC؅|N&GQ:>q` o)$r09hB aG2x+Lsug.cF[6$IB%# awVL󿾉[x(SgˆFi o= h{~g}rϓf :  J& oɾx֟rlqF:$3'Լx^%s䋓ͲK[KWkz!lXϱ'AHNB(l  RVU8Ddy'0/6(wfG)#6"G yEC] KvS+.-Afn^p&֚jZScËE]hV`4*^^ B*|z ;PD+g?\EilgOKjxPpİm+ ä+:/Y CU522&ey [E%EQ[3}-pYMf\[J"|j8!~tpF69$NI5O &y%4weBV"jv9vIp\p;NRUY+ep JQ4#.2=؏jI[MFĕfX$G*˅- JC-P١!)ۄ!A(!,v[- ,@X/-(YI\xv C᭙rA동7 Zf-'`p)DD'zav mwDFVkbw?lZ:ZBQA2XE$x98Fu8~?2, "̢RG'XS40o"mb&VҺ:}MU-Ϫ8ۏmXխ5:UFLݍ|YѨ5@.ߕhH8c {X:-@rT53VԡZ-q܆k q.wGJzs~h@Ĵ'ݹG۬nqUܒk~ H!&x{^u6Cvyba?غX=]6'5-@Asc;GvLyu:-hx*yb=Зƌ!QneEj: gNBaXkCOˤ Ȩ3"I.>si>SJ$攃$ dtDz/Jn˹p '淫 &_0~>Ÿ+fe;|Igsru! Z_ALD5b4ZB^Yi`аXdjR@bJL̵[v1l1HT q"]/3wښWpLy{/wPDIg"P\n[&|v@PڂF&cG5eGO~QU_uVKwUZC2V Wjw_iǨ<#8$p <ۇ_D4PS_fϕI9lh*\9 ܹ@O ,}=a]:gk?ٸaSzRSL}>mDRJtQ: e\W-_q߰[;?+=m6s5= lpd1L&?BX.f϶Y:UU> ,)생iاDB!T0삇鿀]?> stream xytT sM0xK% HdzmdZ%' HIf( Իu~ k5~<󼓃 tVa᳣^x W\ +Ʉo |_arrǪ( 6U[PqSuBi&qV aOQ9zVlpHN~tƅ 6/Rd’eF=;zjs~a¤#^}5O| GEأbql)/X!6[-Ǟ^V`ӱ il6{=Fag9\l 6{{`c E$A@Y&6ѷvf.xv?@} `0O$~ #7[Q|?&d !y@Y`Y-))a}g|$@*>m ϼs7G2LvhlOât޷ݐwl-6[ %X2JR#tɢ#ux" x7PE2d7]6@1L A iw )v7ZФKe ~F p(4-@0d2@$V,fImplj+lcp9TjQgT@J(C\v7ۣ&XPAfk`.rvI@ƣ䱅u &m]Q~'X%i8n8~ˏ?gVMcnq ¡iZhSNO#f[Χ~Dd7܈o΂$| APaN޲7uُ3؋3[u>a\롩UZ0 OF$ٹ/XWK˙ xi OTiRk +LR)3wDƤ1Hʝc`:gw73&8i8+Hq%1+ap>vrsHc{{ eA3>g$DR 4U#uZwU[ H->N0+8cN,`&}!'Fޡi8P膣sxZ#F2_R|o4G~I *m Vm^,{Mj8"mC oe'w a<QixD&vDꖷ'S$*؍驸\hq("P-nYHQH$u*\ЊqF΁tRAzzi K;.I+(@?Q_=k2 D#a2N $ڠOyâLt蔮_WP~p}bWrۯ;NOɐ/SͰpR#5bv؋,/ގC5jF" c];"pRa.qiA`oK֮CkB>k/v^O:*4u< qJԯJ0'l]ڡ7DjL)(0t2}Ifڤag <F2Sqή 8>U 6S,sJܢ7@38B{B E^E1*cgTR>O !SJ u a'5) Ÿsٝvnr bq^ELLFZPke|YXbxM/Gv"isնǑRq<%;"0T1w4>rch)E`uRyFȗd\+$§>6At YmУ-* 'i7nsXf`LfY\/To[j@ ҈czanPX7tη\vQ354zQ K7hJqV5Ґ&Jgz_x놺ܿe1irI@dH{D\W/۹&(ua'bR.)o3+Ngv@?;Wqx7%AR,\lo0 ؃LHmcNj|h.EBpt+>dqٻ# SpQ!x J&ģ _sv5t@O \/1ek$s?!?2x6 +)Jh0/Mt[q׎Xd{+SCmus@&ENe@ށlUnσY.z&R+|'==aT)RY5G/ FdP{F( m ,,.wx[ܱK͗cgǪ kHbtDųI+f/>/ue+b\H~vf39D1BA )*Qш rB/c#ď?7? ܠQGbr} !@ !ouezY!]ݰwGvQڼ)9_ds}c{S;w22R4IP2+3 x\㯢'!cU<9r mjڮB -1gR'݆r0W_w_goPzMT*+%[k{.7`t}}+-H IοqHS,D 5Yl)> m寇d~k^ϗՀ@rHKB䡻.Cx̺xjyiיN! dL敖TCJ6">O @Ǻ"ήuN)I;ɼ|vڎr^oyZ\MSymo|,8>{f_&/zvr27l)ALV[Z:r>V*&/7-Kb3nnng$\TMW\^{4@#މ1g&=C!Bg7X+{qU]5/ G&Sΰ:oi&=x,iY%@%ihJ@W\[0#c >!S~|s EiHe?VJӦ3J[׮Tk@md%''O¢.q^I1'E|A#|g#2c9 $TS$.+,G\w⦽$~/Уd77ap"kOOui#vD(16[f: )\$-JKsI=JERͦx[-p=z? Ƒؑ:q.=7xu}Ǐm߸MUb޷w0 wA<}0[_APW^~5/lr̻ tφQ(p($ LJFK;>Ўg1`Kc??ɸ2$fy>)&FԃòZ!ћvxk#>E/佳;K:fdk Po4rLjr.l8jQ@-kΰ;JY<qVKRqI (M 'ֈkb5MX#C]ISU^Vʿ-=N/}2ۛBChӛSa9@MRk|+-r5ANhĆCɜb$ji@řĎ[AW4u%9Y_N4 oveYQq#ߛDV值 .*RP]yqpa'wR,[u@VPG, {R,_ zf 5 7Nu$Y c uYjTˁŧkjڪOm<ډ/ : #\rg#J]u!!>xQ@WĘ8;}7ӀLCK\l rPqb7Tq|k m{^0.sǚڈ$2 H v%Y"bicЄ&zl8>nJH?]?f/DP(z cxBY2Mp +(ʜjfפhz2D|+Y|7p=l89e8 HUg8ɜ5> _ ,}f@I!a ~D...N{K P%"h? ݞ4_NY`N8{"[^%5P^S"Slg45gwHǣTy< 7LyQr(ưEv7O" %qBM넥ڊkvGJZ& {+벉w]@ڐxaɗmδk28vf{:.x4I]9s:`Bnث݅^z^^_EJ<] J8x4Ȝp^ 2R&*ԇx^x5Ȍqn+ BdHd[6h5CkRLj$c-G3g˹1VtNfXwnfXU]GODZgH|` G¿R(ƋH!lcl*jI,@ʠ2#BZ ZRHnIGGMn)[[k[Cj dJ*}u el 4?J`_|!s [;斋g.\dݛ6h:&K65$^jYȎƎfaZe}%$~3K.lH{YWKiΝ㤵^KI ZjV<O p_}y,*)O Au,t8VJAc#d]1q'&i.Z6 1XjQ+,2RHP;{t6վNRhZ#qIP_suؕ d#^C$3W]!Ai j&_> Nbv&(c<љܻ'yl]WZr洢Ӗ{#]aƞ %D鈖V>Љ,X6ڃ'ܭA|6/{utι3c0RF.C|FȆ`>"SyG?X x)8^{^"T~ lO[lGŊ-TV="Z خWuQ/@ÒY|leP]o ?=t?/pS?R{7so y7Pm]CSgr#Sv<*AePE[?ME,1/1jgD0 y$\(Acåj\֠n@ibiPC?V$ ;*$vlйgPYS,S$!E-eQp`ap㙉*̥OW,ZZtU ,Rٸ2~ځ'!u\#/Q3@ ReHt?3NMH?oJ>exendstream endobj 109 0 obj << /Filter /FlateDecode /Length 2150 >> stream xXMs6[s7$dgxǑ'h $Zow ){Rw~O"Xm>q]jsFH]j,,+.t^:2Bf;WmPyQ81[ys%;dkQ:V4EF_5}mal<[v+$C~Hؽd?a\=vٿ6oѨri.Na̸NHͪ^4۹q'xk^++JXMW&ʝS8&*x> ̧m`a} ؽaXPF9 0y}6f\ O/R6{'lH G?GЃ R䐔 ܠ~g#Rrxt :^DxO=o`iVDs#FKGݐ', /奂>wv ?t)KiŖ]-#Cv _asE؅t_îz"O e2FRˮU …sd/kK+Y~ChMpi"7S/Xwq2<%@W!s3{-Wy<w9Sϴ-\ڃ+jB(cJCrׇHbB-9 5)Af d"G `vߵuKg:CPSaC8F+ikJd|q= H~L{߅ֵiS?cD"Kebk]wm?W:gqA \~dVW>N*0p^]&jTIrg U|pɻd~pv4 ږuqCJP*JhMZVmڻNmBiHT}A1UaX0P8 mq2 tMwڐ=$ˍgz9dY0L)7τ.9Op;Q+JԖ1GKpRA@PiabwI%IVXh#B#H,'$k$ [ ʾЦx(K * +_oIZ 7@Zli4uҐ%{LDsD%j?( hvSw] zV+v$> 8qϳhk|ڇQd3[IVrW%$(X)`7#@oĀiF?ORAs*MU^q*J͗]{dDoU*xX+LP5Jxt\FʝS6LMP wXh7MpM³;x:cbBj-"BҤ P=v>|F! ?/q0>틗s嫹}3k|G!PK!5'0˰Nj?/hxau;FCZ ү%2lv~h7rmthwvEꤋKH<#@+”F8ǡ6gwendstream endobj 110 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 1415 >> stream x-kPW7$$PT-tVu-  u|`H!<`i)(h% QA*5UZҢEE".?Ιs{wsr0 pѱc;_zqB&z#v\pq=色Ihdj #eB'M# EjF(MEKu Zc)iJN/]C~ie+u Q\+dei:ZZ._#:F&kR1  xb1M<1ػM& b<̀s~s uyyUɃ6eZLr..uUܥx WI!eS-/^ 71RHghk_~d6 &܁cv1߃! VϋU;7WsP.jy3R&TTWGɦJ¯/{x&LJ`tR=BïaЧy4"FjkAob<׈Pf?x\\a4Cxߥq%ffzô-sE 󋌫 s?}Yh6&[8B^{ .ač{>0,&l5甽zʹ ) 7~P`^ T)Mc\ƒ؍~PnNJ9Ԅᖛ a} TjWU<(4tG=Y`4NKY-XHL.$ TV *4q ]f "̕ gt<FiUN2&>2\[G.(+tgG9x#F$!x))IPlѤ{}MJ`*fn^ ٿXffM'd`riY^MRJ*x J ,k7JJ*?kvRI;"A"=}9"Y^/bMCO:*GkڣRRo:}B[׋ d2[> stream x%AoAgZVLIbl&6xa,lk.q]`A-PMg65˗=|!M66[얇%nuF"A(jT8錂VUO$X=a9+ y [Ѧ$frVep}]Uը+F%9xu Y%^"x= z!0x+,|ZƂ뒰x|-Чe ΨwRcH0˂{ʠiTIHjF"trMk6I-~00v%rhYuݞmq'2=E7R4vXsvW6vyhB| 0u߁i77'.I*U]tv=89QV j=`kf|7p2"qF?fDUG$P<9d~r%<<\NШendstream endobj 112 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 1999 >> stream xU{P iZQo7zr7y=rR" I $MHB0,o@Ayb}獶3^Z[;Ulgz%eego\v,N`^+ffHCrbbk**IEc|OYnR jYT+(\.\v [W'eb([Jb-pDSY(h RV7u:*\J.~g NvH4uHR},KDV^rUVe+$ja Z[oXöc9Nl76aL#,{{KsD/JHJ87''œ9S$2It"6Ha5J_?,tTkZP[Sp{B. ux#M}no8v/|"p$;i!ҬuN>eMA38Qm#t th1SCYy۴bA}QJVv.MaTmITLaOE_>yz&;?Dih"2a38G7R'ۋt钭7dOaѕo9-?Q%*`>D@-F(1XPf}{J&Yl^ |咽a0وCKs52+ꚏuzN[/[FM/ qցp/tkDNdًppNuG8`m5j[Kf!,c^~{&{} Z(>Q߷0 VVWƤ&gTdCtHw=05 }t0FEg%NkU4Lt۴#EIJR%@yx<64 EmRZ+|vH7_F&Q{ۭ~N^?i*\V2u' S,/[+ra1m2zeR \v>Kv [GzoOM⡒ˑʭU(iNԘ'FVT;lHh32WA}5@([;]6fj|FVA TU;+kLGz:GofiRvI O?< mNؐUgOD-0~aE bh]f2EtKtD>AeNz6\!wp]]lpA]ӲK_v@hg;62Ō00it**&nX"HaTkIy_3+B7W/SY!.. w"+9w.ONRAxX4YPVT7DVQUy {^s:La}V]01(8?=݇#{Ό~e9d✉NqIAͼ+tCV,#,. )npמ9&vNGХt>!NwL>aKgR*-rRw: likc&"7.ϣd|].iMR[y̔O) 6nܛ{p5;R2!o[6%?X3mE`3k0oSk-bYl`Lc1܋V4Ne3礠~vw;|P/*ѩhٱP7$> stream x5]L[uϡ8}MQA6LaB c RP@iC | SMcHdeFq <^X!M޼<88'C$:Qeb Îm}y=HvCy1U RCUVHi24|;--5/endstream endobj 114 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 1119 >> stream x5kLSwϡLJ:lsDc݂sLA.n@WZ NiO[ B9ؖJWEEyCef8/ۢd8i~zߓef`8/o+o2"@2l%j\z]pYc0*ekw7n,^OWT$W7Ȥt7IHad r@ogYUIa^/6i uGk-:y-if i~VbJ&rVn0l1褲F ۅbOOrH2؟23x{6KT 3y"S(,F k|(a>F8Ey(„WV><~61Cy-q[4(J $E)Q<:&#gHdd@hLW& 7*|Pmgڎ0'!o/NhU&EEYTׇJaUӪ; qG#/rϹiq.un>OXK,q]z|WnKbB#,nTD5#ЇlwQ۬*K.Ujd!E GtlYh rbC\^K1D!Cd, t! XCi~{/\KQ+숴&(e%cBSި{tk/{'g''z4YDZT ꪁ:">t{Bf_vP¯V o yno$yK8c `趩,`.)-X2$(Ow/N}Dkoc,|b k[MfE]oI£+) 7wn;{Ϗο&b@K\(v\i a]@}x}>#6Vl&K %`BtXwӗS!S8Cd’)=:^t 5iXa"\wpo4N OS7ldgekY:{T>K|#HY$.}\YZ2ڛaJendstream endobj 115 0 obj << /Filter /FlateDecode /Length 179 >> stream x]O  @E"tЪj!t`K;[>2Nɚ#8* ϰ(xr&U I٥h5I`BBv44 [0R^h`3&.m2Xvrpx6rl,{;]8}+Yendstream endobj 116 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 403 >> stream xcd`ab`ddds541U~H3a!\^kO'nnBs``gd/kliw/,L(QHT04Q020TpM-LNSM,HM,rr3SK*4l2JJ s4u3K2RSRSJsS ӃP>!& ,Ł ]݌= , ~tqwrY%#9z5?X|.+_Sw>c%~ڋtts4455NUW%M2mbĶ~ymʉN=cZӔv[[%4M^|&{-lCKrk̵0endstream endobj 117 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 9144 >> stream xzTUn5m:2H*KiWEA0`@ɒ$tй+NSg&(QI B8<)e-7zk=z^ 8u{߷On*((}̥ƍ$~ E8pm w?> BEY'^;zֺ{IjʙYZu˿`X^~fBFy,ZZmIv,XQ9V6Nbr“O=}3ON9yC<+>bZZz k$k)kk!rh ˬiUYVc`=ΚɚŚ͚Ϛ˚z$k)|Ӭ!WX:֝,..0ՏU*d g*a=,6dQYYXXwf8:\dy)\pݷu*hcO% {Cw~#0`A!>>3;g#rw8mugR2$~w_7ݷ#H>~3`QQ_>4N~11mYmB\67B4ؽaCvskzZ$iWGX=%p(Aq^!)'^9d/B T'1[  e@DqVC ko 8tn-6'`j; 9x\'7Jg4 %Mf-E}'F=قKHks\ baDB$$XI/ZNj$j"cꐈzA{(zcDkRQ.2Oc+B%'tԗ)y{ڕvhnM;Y-ޱqJc}-2j)Doqiz BDdsC~GDYxT\_Snp!4ZMnT3.sK[K g]E?7ݵ7~Gm\2z@hiۚh *?\&&黈P[%@aCAqxɶF> N^gӕ*NaT$;<;B;:(-'GllrH֦y'*lIra`Fաz S=jpf\;~pjdc,@:غWl@K̂+V7a_E;hYolbR"D:OTA2UEw#^ ctI:[mVbQA&1fa]?2 &j6IW@^Y4r(!~ L aP4/ !_HeQq+O-hA/cPiL&1pa}unkTGz,gU+j! @MIHMb<֤$"`C{\MMȢق.b#XhQg-/z=ùD)j/<]ؗ>*lg9Z Llh5;Ȫ$:4 q3MS0ql%ϣY4#\sTWOefU+_[ŷnTT;O9bwC-FXօFҎ3c:&Hj[d6?C*4 Wh}/ܧPc0HC4w/8T\@u"q*%l=C:L88qEPDzzFbﰝ^_Do%]=;h[F_E(- /%:x0ظUD7ƌ-66=`A< IErs<|!MZP%ÄEdDϰٲe{Uu6U)n^ :Zz+')J/1C7QanOW+ELNa§"8)s6N&0jr^pFpfp]aW:0T0jHF"ߔ⤒ ? C(^2{0׿q  .--P U65Ů}Du'j79769\Bzy^Ֆ:.\nz HEɔǧAPIuI `߹nj++ $SDCTRD=0S*.!/iGҞBJ϶h*2b^V`ڥuWNh9L&v B$mA> P[0iv&8L0Y ]pdn@@1k1XR$MȢrM&,HTe !ر&BG\`_O)&)4Xh%z0ӟX,U q;h/822KkÒVIUȳ8MN zRIg{{{kt h'b V)t:^,%Auc\S&ͨ7g2f’JCDWU$7m;#4+"|[yRhPB'hDC-.yHy6OɏD0,*%^HZ&GX{!86q0(juZ{RJUX$X `M"$J'DjDѥzMp-q*:tN1f"j B7_ qufigվ_N! hY4G 1ē#FAЍJ)T! kd#AO-AѢI(wBa&wl/XO&+S y)n-[^GN]r?|={H^䤞BB++o`SWP_|J|t& ^WD:N}PsbJI|*b,lUPS۹5sar0M^ g d a/V@f͘sgU6i -={+Rg-uH|Ŀ|"&5ĪM >Hs4h,4dnҪc|jDqb*~'u`>19J"A ,4 BҪ0/-\1Sw׮ݧZfh!oK7USo]Ժ1Q?XpYH>"eBD*Q0:d̛I)K:k/?x(Em\; n۱X&6*VoѓcP$اav3 8Յ6tDC]z0_QCW)+Fl?h"<9B ڀ뒱'R]6Ӧ:I23TjoiL#L5^ p+qJl@M<<#&)8, 2(b'(Kr_ojwOpR+O?'lZ;vBlOɓ]xƆ27LMK0>{cb9X 0 (mqB&1 jDC{MP+{=8LGѕUv3.]biYeQR""_q{30H( )TZAsP|ߡ.e68GM߹4^Z}|z\@ 3a'E4)/>wp'z80.vjs^YUXP#JaVыz§`M$#$G5aInE9^$rJIEmjȇrYlALzHCnẎ| H3-&ӓ5ԑOfP|ؽ4o{K]?7v-HLߥ_+|<$4mp.vmmiz$y,*rRl$v ` ]0.|24:y['i/,5r}/3 $NiFh"PliO46y>O=j16 ¬P=SNVLVNVOӀE)6ˆ&W|R㧾][DN9I?IHDZq=O#hFG+LdLy. >h!P ;8 r^0 VsB֤ 7]rD-fE,,zl C͸zrĝ1[gbd!P+.wޟE3F%[ҸS.65÷+I-aS*w"aCγ88ζ{}r|tSK(z~N^(vO?(q#C.5*!t5"B{̯쓀P|i=ècƵjk Ֆj g?#~wތٰxfO&RqV Ep+g0MQ-UmfgYZ_]VE;Ξjϸ2A19B~[RoJaDR#Rz3#ҁp,1b򆄹KQU95([.&hFwi>Pfexn#=쵎:;ߴ}4thЍ?FEcC@&b@F@3.# %BRLqeKFIbtͣ6 T+hQAKAy!xF"3S%URqprZn$Y`eDŚJqd]~f# 6 C.I)'rvDI4Kl]b,~Tg3JuJ}X2;"F\ [u6gL)i p\a-*w(zQ7jlVRQ'7ڟ-mor_/߰eKhG@/D?C:F/Q4r#C+R5uy{*@4zYvf[=nDcuqd( B =dh|biZf 4C(^/@oŔ-̘#8`[c^O3 x P!BQQ*bc.ӨMb&.qkM&pg\h !j:{cQ@'lQg}/_؏ 6z#!Ym[MˁMqnG{%brrf, 0Ϣ4)6ITg$PyPoK\EHZ9a1[-+Z۱ͥeDC#ڌ?{l*\ h@MBnI72ƅZNߩDN7a*j[myIxJV@I5qE@\ii6a\-.|ae\[@}}D>j"Q[s5dk!^'H@8A iR _>i5yKQlVLqbꅈK%XM̎U[q047ZrЧ\;L0o|RЊk&dL*cB́w5u`_x-.JEhEj ~)7z䲵a =SGQP!*^bN{ɖ\0m%,ڜ9G&Uie#QDe5U͔XO,Eds~@Sh!R*%2FT i¾N|HcW™.2::뽩rr!JNUvlvlZ.!;\Ԋ~*2֢hWx]%7ڳ)\z _\ܒb(c^7Ri.4!rf孹T:I;> ˗y[7D;eau@G}0eWDT.{MWuʥǶnԉ؟Ef Lѷ_~@Q fC 5m.~X-0=^2Ah=ek4݉f>hl%\ӳuǯnRl:zOmy, ̝F|Q{AGҏ_0u< E A^/&K'%pDJFi088mН7&%-7c6PiQ)FF)Fae"uIsC4`ʙt`A:-tuT_kĖھ Zh=,ZzpzaX4-0 bkY@v?_`#he6PoN^=?Tm'okzjIŶyE-N_[RX~fFKg/zn>Vܛ ޹Yd᫭|δ޸θguu4 w w/twOz]&cƸ].I3]||086Kޘ f~;sAML\W%k֪)v2"6.oX4\t0l-J`bmuJ0z0n^ h#Dd%ySVEC?^@8_#Mw}C5L`dd-5.y_GeFnlbv.m_G>b˺“ݨ[`o}lRe_-5_ϧ{zr\QH-vs9 3xGⱆP̩fzvzn sq? h$//Oj \ gb <ߺd,`43f \͙u"l.O/]Ϡ5Dd Nm/+mF%ˣs5趻跎آsgXp*?1[#GK{&`mdyȺ}\KdN5Jcs1pBt1XM͵lTכ-F 5¬w6aW^騗>TKicZ;ke X |R֫ȱ/[9nԍO _f1cJ*(WhhϙS_.DYnApإUHV%YG7tb[bcGPrٶYO1j2%2EdZ>1I%O=v )8">tyWĤlyl(rc>jV_WZ_Wz4s6nx}|S|o M&,jQҧ[_=*n V,`ӮfewAU)%ۼtހٯz=ΕYjl?!EAh"$򀄢?WiKpĺTҏpf QHL'ֆev: f:7uszZf +%zYPz0hxd LP9ۓ:h6M99䔓2@$QmHK}7u{TRXj~p HOGf_cY 1G&َ =zIܿ3 fh5n y~!T(-Z%TAYX]]g  ǟ<)UjqRQ+v{~m7*&Q5m҇xfIb}v!3BbJʪ+(fJ,$R_<Ѷxk;W;̵p;q:lI7A\rBg?O0M^$vv o3y*]FeC@?w$)|ʈ@ P+ྯ2*E]D:u ht%w=q{Fwt&CS-r? _Cendstream endobj 118 0 obj << /Filter /FlateDecode /Length 2740 >> stream xY[۸~o>t.2 qCdIba+TQ̱F1`qk0 c[P_#Tm `(#<`ˉ$=|vmd!nnȮ5SudfW [ I5y e:- pJ4%FR0S<{O3- x*V EV`B"Q4fCF-GRRyI简HDs#(X&Ec۩,`rbR1 TwYT /jd{QQC.Ic0h}#rURt=6 #4-s\.9fH(r }|l,ONa|M{!Őj;I$;P 3[!9Ҽ4NCs 1IԔ.ך#w J1kC$z89Sy24jT?R\RCq+w"xm^qBm +hƇg{ɸ70FI%~R1oe!Z‹Z/;v?7oiZAs)lpKDfYj}9vƙ܋p%Õic˷Ѹ]ݕ5auduyڎĮ"\0n?9!/g-Wv )L]bY2#*~ܻgћNm3Ӑ@u|6<`Zđ A _uj(XΥ{@/U]ޝ=^Izī AI*3:9C8a|As[RG2KC.(t(?*)A I'vkzFşC9벚V*qӹ{'7g]csϩo?/=7IZ[8F",௘3w .~fg~w%N{q] |r!ݮn8NK-͠6:97ۈU)"M!h9ғĂo1=A wp rˎ`W`)jCƒ1ǜu+&AeDWk%`"@|_#Gy-S`;ZPW:/oNL=rO]sdLWD)9Fy'ud7ֶ=m"SU MJ:bF]+T~,NDsI٭ Uǝނ /3 2L z{/?TQendstream endobj 119 0 obj << /Filter /FlateDecode /Length 2145 >> stream xXM6oC֪. 0#;m@U]@ P=J(YO!%R̗O~{ڗV~~ѵUV_,? w^C{Q<]zKنdA!8 =dSd3DD$ξl<18uiBR'._z)8d*(,k\~ h2_ȌN_|,r=MX^I;eJHuLKPxTd:UiFxx'I)MUugD;yV 1Rt}JLe@;Gç;:&|6;VsgDE{zx~G",v)^;;57G^gKEE7@;'pJ`:wqv1R}c?"G 54dg8 ^p -TB<.{kuq5Xw:ֿi (T'$ ԘNZte&ڦ۩BM[-6=-Wʉ8dodנ/өmlDTУn6Bt۩~TB닋 0kگ; /l} @fNPg"iʩ4m=i-F sE-[عh1L&jx,\Ninp8Ԓ:Zy (Q/P.R|s/endstream endobj 120 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 840 >> stream x]kLSwAA5 f^%f eZ&x.D[V\ʡBo@R*Y /2]da3eKo7EQشq7Wjd_ؠ\5.Y( 8%TU|鴭RTi4U}[h{lx[GR %V"-vS,k؍^S0#O)yߜ }l*3+W}YW-ߵswN~BGȧ+yUsem`XN6`7mcs&Oٵ5MܼtP7لH㻲jzdLdpz_> stream xYKFoaʡB7o+!VRGJ4%1*J~{acCD{Gu=}uuwfvvw FMv%|EDI  K֧ݭe(t\7fv$bm7eU僢1)3KNvYW=]aԭ~ $lI鬄giT%"6p孙޴|1 dp9!l߾Y;vE^fC!N<8h/DLݪ,/F$S58Dڊ2[),79KrlU>ʺlRyelMZ夋Jkpiu=rrN؉DB:kgy\8 \/d;k vوrZQlٞ'BZi;G e"+1nPMAiٱ#y Ȏ.=AWbə -e2:_Ɍ+AO*-Ah=Ԫ_:8S =KM+R'R'"I'nYjY=+ƨd{>cMN|6 cצz11BɾAWa0o&)-gm-r5W}zDfu;)O KUI,u_%+ +tTSWus+L+2zUk+ʮ"j'CTu^԰kjR4L}Hɳ˽MDS}ˁ|h`i wدM4wO9FV)ya2S ![<.Mf,`49Қ ~iK<+Q*'T j喗(ظĪ:8x`-*Ӛ19~]69Q"ؖ_+1JXUP1spc7RM@9 yx '64gH܃|w;O9y w[mѼz]O#hG{^ݦ+f HrT\^^@?@͈Qq)fKV~ qendstream endobj 122 0 obj << /Filter /FlateDecode /Length 2348 >> stream xYKoCDZvVć^ c9hԜmR-g<0ߞ")z$g{X6E_}UvGr?ŻwIvQDE\"w2KD]uo>bY.Eƚ?$2Lh:-`܍x`㾜ܙEvgzl`IĈTt^y09ԏmp yxZyWϳ؜J7Ͼ̙5l| N'{f '0)5;<sH(Ӊt)aG7ظbㆍK~;ժB 2tMfoD< H!&jJ#tomU^?(]DEzQh1q!ı~/E-*Qi$x-D(Amp Ѭ\qr"-*CaJ  \]LgV1)Q$[{:A\sIg8o`Ou 20/ClY nJ@/ om;INS 13P̂)[seQϜƝI 0/I=IK'b0ɯz< Ët@BH3}» (Ȓc9A<22@:buF"Orݠ[ޚBoA3}n(H~&'1g|"ɤC(8SD U7qbIb4NQkX`$`"fXh&B@m\$k$ 6 $I@#8r̂Y07ڵv[ VcҝR-^8me_L9EjZy{*~N뉿q89)qp%S,ird >o2(“olO|/d9DbIca q5ruk[P<@lcD#%@-]Vc#Ie8 F<ԥ 5u(p*4%5rʛ$&%WASQae]ܝdp &a#'p>C򧸟w[LrZ|"5c~&Yny$&vo(,~0C1H>SIRtd^pЩ6qJU$H cG͖l"x萦B|Y8/K0ƝoB ) w^Iu8mWj@`J4,-w6u)Lɵ2*&PlhʗS@qF9=}"$qRG 7ԃ+4pгnH9Whq n:1UϮRCm4?N抋~Gt TS/n5^`յ藀?Dqoj an#rK'_,G ϩ:0#Hpq\ !NnY?P0RDpu*[zc݈<3U # `=cHPJ׈K̍ϥt X~.=6L7>Jϰ;Rs_YmݯK`a2:|: jt7SyPЯx/"=j!>gӇb^7qktmEmBG{k W;iXD]xGl7$7\OvipM<؍'rVc =OV+#/凹4 [+5&R6Ilx 7G4O_ic>.#8P3/rn4G:<."0LWq3 $[mCЋS[qͻ W3x o^>/|Ñ]o P &|~ ﹟T 79 Ff\[Y<@AaP)j,@y3ypKvgkY\gzX"N2C(Զ oq; .>fX\:DS巈v꿸Yo02I-g zѿ~ Ti偡?|-Y|eEKιfrҢYgWi{A)U/hfLTyfl>B9}¸t;}@|Ĺie]l)WU9T`)ݟEӊ2endstream endobj 123 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 6512 >> stream xy teoH`Qږ*EQ_" NꪯIwҝ@<EQ񍋳κ{֙ٯsϞNr}:]m999S\z?tL̬`Z.v[{k;rrfچVԗmooP(~EM͚t%˖З}7*߬Ճ# lJEbbbMjZ<:K(QlP,R<بxEbqWKO*^SB*G%NZE=]l)-0~J5ni޺jf ^Ý>ו4ܟ 6QLy/R Yd#cRtu0)%OW&C?rBvj3/&g6 UXf?,F];ƓᎳg\'oq6+AS1`xl?}XCj`Q K{hƕ k\hxܓ':N=]F L: `}08FoܽzYqEq9qsd7sυ?fVd~y6A  ='pqjY1y X 6֏qMNN`H>wbp1XፈQh2kU EH,[ 3XtXa]#` ź;N3oUWʚu65K34q@^B=PR{[h1;f`eqf2Y<|>~q>,)^N< zcc'C7LU]b斳I.o%W켾@bhN&DJmvƺHQv^w`6)+x0eC< `-|Z68 '%M0xs0,)+Ua%rl/6sponz^#~q RdNfEaU`-跶Svk X1URnkٹ Q(@%E ~qQZJG"1va[`}ouձ> >$-{'?ccJ{H[ˍ'{?<ׅ!G*DBG͠M;{qoK@%\$הޥ{Gr`Q᳍n6EjٮM澒?>N{!!hu7v:^J&N!A7ć< $Vx((DN|F#=>NVTAW7:Zyl&8_lud$ur}fcVK_K6]ܻk w~TKjᒌMWnT"1v4˪^ygɯ>9nR|_3V[pߎx$% kjYT1erqTpnl]XOfg'Xj|}0uŅ8hLJ6<_Fu#4!ChJ(Ot#|fSVGj^o\;WyѾ!F} v| b?9$~vv1fL@:mjorAYEu%o>7# +/~*+f+v$ۉ4B)dd&hmcC ;hiq܁YN ,H(!?eaBxt `B Z'-}Qծ3FKp OyH zFYފ8\)h.D'F46hZe^]T\KݾqؠV Cj €=A3n':Mfi G`(U`֖;@ &J+uílzyBG`|J!\o ,#L Y@!L*s$6ݾkjoX?]Q`X¤n뻞A:Hͨ[jCj։%Xj!_h6҃M ߆Y*~f1KB4 "8ul" %+u8fsp/ ΌKHxvGÁ4M U&(z0VmMnlVV9pMWe^F94M:=p#YC[jA@~u?G>'~{t :Fthm67p~o+wD;ZTܶGQ}ӕI@N.!zѴ#ba WфԁsQ+U7Ȋ&YQ'+;= EM.-dMlÇ pA&0fC,>'Gc&.c:0Bw~p?$D<Ʀ9sst$ 7?0aO l 1qa xaffݖo-oqcIH<]\ 1VT"7k0IyR@Nta | S耓3|Ty\M0tf2_2scm]ϼ I;6*wG1)Iz{ /.{Bi zQW_\]n{ʲR|1>s2PbH#Z>,'r|5R77 >u0iQ%,6̺OK Ie\N#1RFn mf,Uߩݛa֎Q!6؊8(|hQA> g|R#3wAN ߋ?瑟.;zvzP#qt f mMkQ?r8x> ,g8T~ql{by\vf08۷IK_&l$|L33Cר %V+[-@(S4KfK&V/S-iD)OoSp-OJjhc_ɱwxP,ق<1+&`"˪YEOyО dtǹh3 p6Ƥր*D(wvt.f _q2[4Պw9f],Syc*IXNzߘon+ogi6 (Squ|r _QBٳŤSl:d1ĺ{9 y׺Y{=dtot,4&vKٕ%חUfڇ(Iv՗GgN"YOkӛzZnw~F";vb=&t+89naZ=a[YVDK^n?{r<E./AR =.C8y {|/q?`i#Rp۶qJl\=wg{|vN2:ͬkr׹zӤrTS3cGor~/9%|C'Nk<A9;{Qޞi M(ۀdtn&R额oA^/1wps ~&#ӶPZ%rnGK/u6/Jvy$rdh![dbsWp>߿QPV ֌5lNfzӶ7d,*N灄/!v?|_2pz&yys:2ܳK욡o3.(Dum!lJڢ[^8 > /W [ 1 3 1 ] /Info 3 0 R /Root 2 0 R /Size 125 /ID [<3a02fa476f59f0593aef2921dcd6875b>] >> stream xcb&F~0 $8J$9٬{As6\d"X7X6DH RDHo dvZ``@$D$9W,E`@i̞ endstream endobj startxref 78793 %%EOF ipred/inst/doc/ipred-examples.Rnw0000644000176200001440000004270414172231220016507 0ustar liggesusers\documentclass[11pt]{article} \usepackage[round]{natbib} \usepackage{bibentry} \usepackage{amsfonts} \usepackage{hyperref} \renewcommand{\baselinestretch}{1.3} \newcommand{\ipred}{\texttt{ipred }} %\VignetteIndexEntry{Some more or less useful examples for illustration.} %\VignetteDepends{ipred} %\textwidth=6.2in %\VignetteDepends{mvtnorm,TH.data,rpart,MASS} \begin{document} \title{\ipred: Improved Predictors} \date{} \SweaveOpts{engine=R,eps=TRUE,pdf=TRUE} <>= options(prompt=">", width=50) set.seed(210477) @ \maketitle This short manual is heavily based on \cite{Rnews:Peters+Hothorn+Lausen:2002} and needs some improvements. \section{Introduction} In classification problems, there are several attempts to create rules which assign future observations to certain classes. Common methods are for example linear discriminant analysis or classification trees. Recent developments lead to substantial reduction of misclassification error in many applications. Bootstrap aggregation \citep[``bagging'',][]{breiman:1996} combines classifiers trained on bootstrap samples of the original data. Another approach is indirect classification, which incorporates a priori knowledge into a classification rule \citep{hand:2001}. Since the misclassification error is a criterion to assess the classification techniques, its estimation is of main importance. A nearly unbiased but highly variable estimator can be calculated by cross validation. \cite{efron:1997} discuss bootstrap estimates of misclassification error. As a by-product of bagging, \cite{out-of-bag:1996} proposes the out-of-bag estimator. \\ However, the calculation of the desired classification models and their misclassification errors is often aggravated by different and specialized interfaces of the various procedures. We propose the \ipred package as a first attempt to create a unified interface for improved predictors and various error rate estimators. In the following we demonstrate the functionality of the package in the example of glaucoma classification. We start with an overview about the disease and data and review the implemented classification and estimation methods in context with their application to glaucoma diagnosis. \section{Glaucoma} Glaucoma is a slowly processing and irreversible disease that affects the optic nerve head. It is the second most reason for blindness worldwide. Glaucoma is usually diagnosed based on a reduced visual field, assessed by a medical examination of perimetry and a smaller number of intact nerve fibers at the optic nerve head. One opportunity to examine the amount of intact nerve fibers is using the Heidelberg Retina Tomograph (HRT), a confocal laser scanning tomograph, which does a three dimensional topographical analysis of the optic nerve head morphology. It produces a series of $32$ images, each of $256 \times 256$ pixels, which are converted to a single topographic image. A less complex, but although a less informative examination tool is the $2$-dimensional fundus photography. However, in cooperation with clinicians and a priori analysis we derived a diagnosis of glaucoma based on three variables only: $w_{lora}$ represents the loss of nerve fibers and is obtained by a $2$-dimensional fundus photography, $w_{cs}$ and $w_{clv}$ describe the visual field defect \citep{ifcs:2001}. \begin{center} \begin{figure}[h] \begin{center} {\small \setlength{\unitlength}{0.6cm} \begin{picture}(14.5,5) \put(5, 4.5){\makebox(2, 0.5){$w_{clv}\geq 5.1$}} \put(2.5, 3){\makebox(2, 0.5){$w_{lora}\geq 49.23$}} \put(7.5, 3){\makebox(2, 0.5){$w_{lora} \geq 58.55$}} \put(0, 1.5){\makebox(2, 0.5){$glaucoma$}} \put(3.5, 1.5){\makebox(2, 0.5){$normal$}} \put(6.5, 1.5){\makebox(2, 0.5){$w_{cs} < 1.405$}} \put(10, 1.5){\makebox(2, 0.5){$normal$}} \put(3.5, 0){\makebox(2, 0.5){$glaucoma$}} \put(6.5, 0){\makebox(2, 0.5){$normal$}} \put(6, 4.5){\vector(-3, -2){1.5}} \put(6, 4.5){\vector(3, -2){1.5}} \put(3.5, 3){\vector(3, -2){1.5}} \put(3.5, 3){\vector(-3, -2){1.5}} \put(8.5, 3){\vector(3, -2){1.5}} \put(8.5, 3){\vector(-3, -2){1.5}} \put(6.5, 1.5){\vector(3, -2){1.5}} \put(6.5, 1.5){\vector(-3, -2){1.5}} \end{picture} } \end{center} \caption{Glaucoma diagnosis. \label{diag}} \end{figure} \end{center} Figure \ref{diag} represents the diagnosis of glaucoma in terms of a medical decision tree. A complication of the disease is that a damage in the optic nerve head morphology precedes a measurable visual field defect. Furthermore, an early detection is of main importance, since an adequate therapy can only slow down the progression of the disease. Hence, a classification rule for detecting early damages should include morphological informations, rather than visual field data only. Two example datasets are included in the package. The first one contains measurements of the eye morphology only (\texttt{GlaucomaM}), including $62$ variables for $196$ observations. The second dataset (\texttt{GlaucomaMVF}) contains additional visual field measurements for a different set of patients. In both example datasets, the observations in the two groups are matched by age and sex to prevent any bias. \section{Bagging} Referring to the example of glaucoma diagnosis we first demonstrate the functionality of the \texttt{bagging} function. We fit \texttt{nbagg = 25} (default) classification trees for bagging by <>= library("ipred") library("rpart") library("MASS") data("GlaucomaM", package="TH.data") gbag <- bagging(Class ~ ., data = GlaucomaM, coob=TRUE) @ where \texttt{GlaucomaM} contains explanatory HRT variables and the response of glaucoma diagnosis (\texttt{Class}), a factor at two levels \texttt{normal} and \texttt{glaucoma}. \texttt{print} returns informations about the returned object, i.e. the number of bootstrap replications used and, as requested by \texttt{coob=TRUE}, the out-of-bag estimate of misclassification error \citep{out-of-bag:1996}. <>= print(gbag) @ The out-of-bag estimate uses the observations which are left out in a bootstrap sample to estimate the misclassification error at almost no additional computational costs. \cite{double-bag:2002} propose to use the out-of-bag samples for a combination of linear discriminant analysis and classification trees, called ``Double-Bagging''. For example, a combination of a stabilised linear disciminant analysis with classification trees can be computed along the following lines <>= scomb <- list(list(model=slda, predict=function(object, newdata) predict(object, newdata)$x)) gbagc <- bagging(Class ~ ., data = GlaucomaM, comb=scomb) @ \texttt{predict} predicts future observations according to the fitted model. <>= predict(gbagc, newdata=GlaucomaM[c(1:3, 99:102), ]) @ Both \texttt{bagging} and \texttt{predict} rely on the \texttt{rpart} routines. The \texttt{rpart} routine for each bootstrap sample can be controlled in the usual way. By default \texttt{rpart.control} is used with \texttt{minsize=2} and \texttt{cp=0} and it is wise to turn cross-validation off (\texttt{xval=0}). The function \texttt{prune} can be used to prune each of the trees to an appropriate size. \section{Indirect Classification} Especially in a medical context it often occurs that a priori knowledge about a classifying structure is given. For example it might be known that a disease is assessed on a subgroup of the given variables or, moreover, that class memberships are assigned by a deterministically known classifying function. \cite{hand:2001} proposes the framework of indirect classification which incorporates this a priori knowledge into a classification rule. In this framework we subdivide a given data set into three groups of variables: those to be used predicting the class membership (explanatory), those to be used defining the class membership (intermediate) and the class membership variable itself (response). For future observations, an indirect classifier predicts values for the appointed intermediate variables based on explanatory variables only. The observation is classified based on their predicted intermediate variables and a fixed classifying function. This indirect way of classification using the predicted intermediate variables offers possibilities to incorporate a priori knowledge by the subdivision of variables and by the construction of a fixed classifying function. We apply indirect classification by using the function \texttt{inclass}. Referring to the glaucoma example, explanatory variables are HRT and anamnestic variables only, intermediate variables are $w_{lora}, \, w_{cs}$ and $w_{clv}$. The response is the diagnosis of glaucoma which is determined by a fixed classifying function and therefore not included in the learning sample \texttt{GlaucomaMVF}. We assign the given variables to explanatory and intermediate by specifying the input formula. <>= data("GlaucomaMVF", package="ipred") GlaucomaMVF <- GlaucomaMVF[,-63] formula.indirect <- Class~clv + lora + cs ~ . @ The variables on the left-hand side represent the intermediate variables, modeled by the explanatory variables on the right-hand side. Almost each modeling technique can be used to predict the intermediate variables. We chose a linear model by \texttt{pFUN = list(list(model = lm))}. <>= classify <- function (data) { attach(data) res <- ifelse((!is.na(clv) & !is.na(lora) & clv >= 5.1 & lora >= 49.23372) | (!is.na(clv) & !is.na(lora) & !is.na(cs) & clv < 5.1 & lora >= 58.55409 & cs < 1.405) | (is.na(clv) & !is.na(lora) & !is.na(cs) & lora >= 58.55409 & cs < 1.405) | (!is.na(clv) & is.na(lora) & cs < 1.405), 0, 1) detach(data) factor (res, labels = c("glaucoma", "normal")) } fit <- inclass(formula.indirect, pFUN = list(list(model = lm)), cFUN = classify, data = GlaucomaMVF) @ \texttt{print} displays the subdivision of variables and the chosen modeling technique <>= print(fit) @ Furthermore, indirect classification predicts the intermediate variables based on the explanatory variables and classifies them according to a fixed classifying function in a second step, that means a deterministically known function for the class membership has to be specified. In our example this function is given in Figure \ref{diag} and implemented in the function \texttt{classify}.\\ Prediction of future observations is now performed by <>= predict(object = fit, newdata = GlaucomaMVF[c(1:3, 86:88),]) @ We perform a bootstrap aggregated indirect classification approach by choosing \texttt{pFUN = bagging} and specifying the number of bootstrap samples \citep{ifcs:2001}. Regression or classification trees are fitted for each bootstrap sample, with respect to the measurement scale of the specified intermediate variables <>= mypredict.rpart <- function(object, newdata) { RES <- predict(object, newdata) RET <- rep(NA, nrow(newdata)) NAMES <- rownames(newdata) RET[NAMES %in% names(RES)] <- RES[NAMES[NAMES %in% names(RES)]] RET } fit <- inbagg(formula.indirect, pFUN = list(list(model = rpart, predict = mypredict.rpart)), cFUN = classify, nbagg = 25, data = GlaucomaMVF) @ The call for the prediction of values remains unchanged. \section{Error Rate Estimation} Classification rules are usually assessed by their misclassification rate. Hence, error rate estimation is of main importance. The function \texttt{errorest} implements a unified interface to several resampling based estimators. Referring to the example, we apply a linear discriminant analysis and specify the error rate estimator by \texttt{estimator = "cv", "boot"} or \texttt{"632plus"}, respectively. A 10-fold cross validation is performed by choosing \texttt{estimator = "cv"} and \texttt{est.para = control.errorest(k = 10)}. The options \texttt{estimator = "boot"} or \texttt{estimator = "632plus"} deliver a bootstrap estimator and its bias corrected version {\sl .632+} \citep[see][]{efron:1997}, we specify the number of bootstrap samples to be drawn by \texttt{est.para = control.errorest(nboot = 50)}. Further arguments are required to particularize the classification technique. The argument \texttt{predict} represents the chosen predictive function. For a unified interface \texttt{predict} has to be based on the arguments \texttt{object} and \texttt{newdata} only, therefore a wrapper function \texttt{mypredict} is necessary for classifiers which require more than those arguments or do not return the predicted classes by default. For a linear discriminant analysis with \texttt{lda}, we need to specify <>= mypredict.lda <- function(object, newdata){ predict(object, newdata = newdata)$class } @ and calculate a 10-fold-cross-validated error rate estimator for a linear discriminant analysis by calling <>= errorest(Class ~ ., data= GlaucomaM, model=lda, estimator = "cv", predict= mypredict.lda) @ For the indirect approach the specification of the call becomes slightly more complicated. %Again for a unified interface a wrapper %function has to be used, which incorporates the fixed classification rule The bias corrected estimator {\sl .632+} is computed by <>= errorest(formula.indirect, data = GlaucomaMVF, model = inclass, estimator = "632plus", pFUN = list(list(model = lm)), cFUN = classify) @ Because of the subdivision of variables and a formula describing the modeling between explanatory and intermediate variables only, we must call the class membership variable. Hence, in contrast to the function \texttt{inclass} the data set \texttt{GlaucomaMVF} used in \texttt{errorest} must contain explanatory, intermediate and response variables. Sometimes it may be necessary to reduce the number of predictors before training a classifier. Estimating the error rate after the variable selection leads to biased estimates of the misclassfication error and therefore one should estimate the error rate of the whole procedure. Within the \texttt{errorest} framework, this can be done as follows. First, we define a function which does both variable selection and training of the classifier. For illustration proposes, we select the predictors by comparing their univariate $P$-values of a two-sample $t$-test with a prespecified level and train a LDA using the selected variables only. <>= mymod <- function(formula, data, level=0.05) { # select all predictors that are associated with an # univariate t.test p-value of less that level sel <- which(lapply(data, function(x) { if (!is.numeric(x)) return(1) else return(t.test(x ~ data$Class)$p.value) }) < level) # make sure that the response is still there sel <- c(which(colnames(data) %in% "Class"), sel) # compute a LDA using the selected predictors only mod <- lda(formula , data=data[,sel]) # and return a function for prediction function(newdata) { predict(mod, newdata=newdata[,sel])$class } } @ Note that \texttt{mymod} does not return an object of class \texttt{lda} but a function with argument \texttt{newdata} only. Thanks to lexical scoping, this function is used for computing predicted classes instead of a function \texttt{predict} passed to \texttt{errorest} as argument. Computing a $5$-fold cross-validated error rate estimator now is approximately a one-liner. <>= errorest(Class ~ . , data=GlaucomaM, model=mymod, estimator = "cv", est.para=control.errorest(k=5)) @ %%To summarize the performance of the different classification techniques in the considered example of glaucoma diagnosis, the 10-fold %%cross-validated error estimator delivers the %%results given in Table \ref{tenf}. %%\begin{figure} %%\begin{center} %%\begin{tabular}{ rrr } %%\hline %%dataset & method & error estimate \\ %%\hline %%\texttt{GlaucomaM} & {\sl slda} & 0.168 \\ %%\texttt{GlaucomaM} & {\sl bagging} & 0.158 \\ %%\texttt{GlaucomaM} & {\sl double-bagging} & 0.153 \\ %%\texttt{GlaucomaMVF} & {\sl inclass-bagging} & 0.206 \\ %%\tetxtt{GlaucomaMVF} & {\sl inclass-lm} & 0.229 \\ %%\hline %%\end{tabular} %%\caption{10-fold cross-validated error estimation of %%the misclassification error for several classification %%methods: {\sl slda} - stabilised linear discriminant analysis, %%{\sl bagging} - bagging with 50 bootstrap samples, %%{\sl double-bagging} - bagging with 50 bootstrap samples, %%combined with sLDA, {\sl inclass-bagging} - %%indirect classification using bagging, %%{\sl inclass-lm} indirect classification using %%linear modeling. \label{tenf}} %%\end{center} %%\end{figure} %%Note that an estimator of the variance is available for the ordinary %%bootstrap estimator (\texttt{estimator="boot"}) only, see \cite{efron:1997}. \section{Summary} \ipred tries to implement a unified interface to some recent developments in classification and error rate estimation. It is by no means finished nor perfect and we very much appreciate comments, suggestions and criticism. Currently, the major drawback is speed. Calling \texttt{rpart} $50$ times for each bootstrap sample is relatively inefficient but the design of interfaces was our main focus instead of optimization. Beside the examples shown, \texttt{bagging} can be used to compute bagging for regression trees and \texttt{errorest} computes estimators of the mean squared error for regression models. \bibliographystyle{plainnat} \bibliography{ipred} \end{document} ipred/inst/COPYRIGHTS0000644000176200001440000000107514172231220013745 0ustar liggesusersCOPYRIGHT STATUS ---------------- The bulk of this code is Copyright (C) 2002-2012 Andrea Peters and Torsten Hothorn except the code in .R/irpart.R .R/predict.irpart.R which are modifications from the files rpart.s and predict.rpart.s from package `rpart', version 3.1-8 which is Copyright (C) 2000 Mayo Foundation for Medical Education and Research with modifications for R by Brian D. Ripley. All code is subject to the GNU General Public License, Version 2. See the file COPYING for the exact conditions under which you may redistribute it. ipred/inst/NEWS0000644000176200001440000001541714402334555013046 0ustar liggesusers# $Id: CHANGES,v 1.48 2009/09/09 15:40:28 hothorn Exp $ 0.9-14 (09.03.2023) S3 registration 0.9-13 (02.06.2022) no need to escape & 0.9-12 (15.09.2021) predict(, newdata) returned factors with set of levels depending on newdata, spotted by Max Kuhn 0.9-11 (12.03.2021) suggest party 0.9-10 (03.03.2021) suggest randomForest RNGversion("3.5.3") 0.9-9 (29.04.2019) RNGversion("3.5.3") inbagg potentially treated the y ~ . case incorrectly 0.9-8 (05.11.2018) test output update 0.9-7 (17.08.2018) sbrier returned incorrect answers for unseen time points; reported and fixed by Marvin Wright 0.9-6 (01.03.2017) register C routines 0.9-5 (28.07.2015) fix NAMESPACE 0.9-4 (20.02.2015) register predict.ipredknn 0.9-3 (20.12.2013) use trapezoid rule to compute integrated Brier score in sbrier 0.9-2 (02.09.2013) NAMESPACE issues, TH.data 0.9-0 (22.10.2012) Due to interface changes in rpart 3.1-55, the bagging function had to be rewritten. Results of previous version are not exactly reproducible. 0.8-13 (21.02.2012) import(survival) 0.8-12 (20.02.2012) use prodlim to compute censoring distributions in sbrier (makes a difference for tied survival times) GPL (>= 2) and no require in .onLoad 0.8-11 (08.02.2011) depends R >= 2.10 0.8-10 (02.02.2011) compress data files 0.8-9 (27.01.2011) fix nrow problem in sbrier, spotted by Phil Boonstra avoid partial matches of function arguments 0.8-8 (09.09.2009) documentation fixes 0.8-7 (27.03.2009) survival fixes 0.8-6 (28.07.2008) make R devel happy ($ is no longer allowed) 0.8-4 (09.10.2007) change maintainer 0.8-3 (29.06.2005) terms(formula, data) needs `data' argument (suggested by BDR). 0.8-2 (09.12.2004) - slda: correct for one explanatory variable: ewp <- svd(solve(diag(diag(Snull), ncol = ncol(Snull)))%*%Snull) ^^^^^^^^^^^^^ 0.8-1 (25.11.2004) - change #!/bin/bash -> #!/bin/sh 0.8-0 (02.06.2004) - correction of NAMESPACES 0.7-9 (13.05.2004) -description file, insert suggests: mvtnorm 0.7-8 (21.04.2004) - don't run selected examples and ipred-tests.R 0.7-7 (02.02.2004) -return predicted values for error estimations "boot" and "632plus" if required -optional argument determining which observations are incuded in each sample within 'errorest' -"boot" and "632plus" can be computed simultanously 0.7-6 (16.01.2004) fix tests/ipred-segfault 0.7-5 (19.12.2003) examples of inbagg and predict.inbagg (don't use mvtnorm) 0.7-4 (16.12.2003) R-1.9.0 fixes 0.7-3 (03.11.2003) fix documentation bugs found by `codoc' 0.7-2 (29.10.2003) `rpart' is sensitive to compilers / optimization flags: the results we compare `ipred's tests with are produced with an optimized version of `rpart' (gcc -O2). `eigen' in `slda' replaced by `svd' 0.7-1 (08.08.2003) adapt to changes in R-devel and lda (package MASS) 0.7-0 (08.08.2003) add namespaces 0.6-15 (----) new argument "getmodels=TRUE" to cv: the returned object has an element "models", a list which contains the models for each fold. new interface for inclass and adding method inbagg. 0.6-14 (13.03.2003) clean up bagging.Rd 0.6-12 (12.03.2003) methods for "integer" for the generics "bagging", "cv" and "bootest" do not call methods to generics directly, since they may be hidded (because not exported: predict.lda) 0.6-11 (05.03.2003) 632plus was false when the no-information error rate was less than the raw bootstrap estimator (eq. 29 was used instead of eq. 32 in Efron & Tibshirani, 1997). Thanks to Ramon Diaz for reporting. changed the RNGkind to RNGkind("Wichmann-Hill", "Kinderman-Ramage") or RNGversion("1.6.2") making the regression tests pass R CMD check with R-devel (1.7.0) ipred is allowed to import rpart.{anova, class, exp, poisson, matrix} from package rpart, thanks to BDR. 0.6-9 (25.02.2003) the terms attribute of data in errorest.data.frame may cause problems with some predict methods -> deleted 0.6-7 (17.02.2003) use a formula / data framework in cv and bootest. "model" now deals with the original variable names (and formula) instead of "y" and "X". "model" is now allowed to return a function with newdata argument for prediction. This is especially useful for estimating the error of both variable selection and model building simultaneously, the vignette gives a simple example. cv.numeric and bootest.numeric were broken and gave faulty estimates of MSE, both problems fixed if the maximum of votes for any class is not unique, the class is choosen by random in predict.classbagg now. Formerly, the class with lowest level was choosen by mistake. 0.6-6 (06.02.2003) fixes required by attached "methods" package 0.6-4 (18.12.2002) R CMD build problems 0.6-3 (03.12.2002) cv in errorest did faultly use all observations for estimating the error which lead to over optimistic results 0.6-2 (18.10.2002) documentation updates and copyright status added 0.6-1 (02.10.2002) documentation fixes 0.6-0 (27.09.2002) added vignette documentation updates 0.5-7 (23.09.2002) add internal functions irpart and predict.irpart for speeding up standard bagging use error.control for the specification of control parameters cv can be used to caculcate an "honest" prediction for each observation 0.5-6 (12.09.2002) code factors in GBSG2 data as factors. Documentation update. Add keepX argument to ipredbagg 0.5-5 (10.09.2002) set rpart.control(..., xval=0) by default 0.5-4 (05.08.2002) added k-NN with formula interface and stabilized LDA 0.5-3 (01.08.2002) use rpart.control() for regression and survival new documentation for bagging and friends 0.5-2 (30.07.2002) new low-level functions cv and bootest for error rate estimators (misclassification, mse, brier score) 0.5-1 (25.07.2002) bagging code completely rewritten 0.4-6 (27.06.2002) out-of-bag error for regression trees fixed. 0.4-5 (17.06.2002) use "minsplit = 2" in `rpart.control' passed to `bagging' 0.4-4 (17.05.2002) use predict.lda in bagging and predict.bagging bagging(..., method="double") did not work for factors. 0.4-3 (07.05.2002) bugfix in bagging (in models with one regressor), changes in documentation errorest 0.4-2 (10.04.2002) predict.bagging much faster, OOB much faster 0.4-1 (08.04.2002) bugfix in print.inclass, predict.inclass 0.4-0 (26.03.2002) pre-release for CRAN/devel ipred/cleanup0000755000176200001440000000045014402361655012737 0ustar liggesusers#!/bin/sh for f in ./R/*~; do rm -f $f done for f in ./tests/*~; do rm -f $f done for f in .*~; do rm -f $f done for f in ./man/*~; do rm -f $f done for f in ./data/*~; do rm -f $f done for f in *~; do rm -f $f done find . -name "DEADJOE" -exec rm -f {} \; exit 0