ROCR/0000755000176000001440000000000012505027741011052 5ustar ripleyusersROCR/inst/0000755000176000001440000000000012504526277012036 5ustar ripleyusersROCR/inst/CITATION0000644000176000001440000000134512504526277013176 0ustar ripleyuserscitHeader("To cite ROCR in publications use:") bibentry(entry="article", title = "ROCR: visualizing classifier performance in R", author = personList(as.person("T. Sing"), as.person("O. Sander"), as.person("N. Beerenwinkel"), as.person("T. Lengauer")), year = "2005", journal = "Bioinformatics", volume = 21, number = 20, pages = 3940--3941, bibtype = "Article", url = "http://rocr.bioinf.mpi-sb.mpg.de" ) citFooter("We have invested a lot of time and effort in creating ROCR,", "please cite it when using it for data analysis.") ROCR/NAMESPACE0000644000176000001440000000031312504760242012265 0ustar ripleyusers# Default NAMESPACE created by R # Remove the previous line if you edit this file # Export all names exportPattern(".") # Import all packages listed as Imports or Depends import( gplots, methods ) ROCR/demo/0000755000176000001440000000000012504526277012005 5ustar ripleyusersROCR/demo/00Index0000644000176000001440000000007612504526277013142 0ustar ripleyusersROCR demonstrates some of the graphical capabilities of ROCR ROCR/demo/ROCR.R0000644000176000001440000002172112504526277012700 0ustar ripleyusers## ----------------------------------------------------------------------------------- ## Demo file for ROCR; start with 'demo(ROCR)' ## ----------------------------------------------------------------------------------- # if(dev.cur() <= 1) get(getOption("device"))() if(dev.cur() <= 1) dev.new() opar <- par(ask = interactive() && (.Device %in% c("X11", "GTK", "gnome", "windows","quartz"))) data(ROCR.hiv) pp <- ROCR.hiv$hiv.svm$predictions ll <- ROCR.hiv$hiv.svm$labels par(mfrow=c(2,2)) pred<- prediction(pp, ll) perf <- performance(pred, "tpr", "fpr") plot(perf, avg= "threshold", colorize=T, lwd= 3, main= "With ROCR you can produce standard plots like ROC curves ...") plot(perf, lty=3, col="grey78", add=T) perf <- performance(pred, "prec", "rec") plot(perf, avg= "threshold", colorize=T, lwd= 3, main= "... Precision/Recall graphs ...") plot(perf, lty=3, col="grey78", add=T) perf <- performance(pred, "sens", "spec") plot(perf, avg= "threshold", colorize=T, lwd= 3, main="... Sensitivity/Specificity plots ...") plot(perf, lty=3, col="grey78", add=T) perf <- performance(pred, "lift", "rpp") plot(perf, avg= "threshold", colorize=T, lwd= 3, main= "... and Lift charts.") plot(perf, lty=3, col="grey78", add=T) # ------------------------------------------------------------------------------------ data(ROCR.xval) pp <- ROCR.xval$predictions ll <- ROCR.xval$labels pred <- prediction(pp,ll) perf <- performance(pred,'tpr','fpr') par(mfrow=c(2,2)) plot(perf, colorize=T, lwd=2,main='ROC curves from 10-fold cross-validation') plot(perf, avg='vertical', spread.estimate='stderror',lwd=3,main='Vertical averaging + 1 standard error',col='blue') plot(perf, avg='horizontal', spread.estimate='boxplot',lwd=3,main='Horizontal averaging + boxplots',col='blue') plot(perf, avg='threshold', spread.estimate='stddev',lwd=2, main='Threshold averaging + 1 standard deviation',colorize=T) # ------------------------------------------------------------------------------------ data(ROCR.hiv) pp.unnorm <- ROCR.hiv$hiv.svm$predictions ll <- ROCR.hiv$hiv.svm$labels # normalize predictions to 0..1 v <- unlist(pp.unnorm) pp <- lapply(pp.unnorm, function(run) {approxfun(c(min(v), max(v)), c(0,1))(run)}) par(mfrow=c(2,2)) pred<- prediction(pp, ll) perf <- performance(pred, "tpr", "fpr") plot(perf, avg= "threshold", colorize=T, lwd= 3, coloraxis.at=seq(0,1,by=0.2), main= "ROC curve") plot(perf, col="gray78", add=T) plot(perf, avg= "threshold", colorize=T, colorkey=F,lwd= 3, main= "ROC curve",add=T) perf <- performance(pred, "acc") plot(perf, avg= "vertical", spread.estimate="boxplot", lwd=3,col='blue', show.spread.at= seq(0.1, 0.9, by=0.1), main= "Accuracy across the range of possible cutoffs") plot(performance(pred, "cal", window.size= 10), avg="vertical", main= "How well are the probability predictions calibrated?") plot(0,0,type="n", xlim= c(0,1), ylim=c(0,7), xlab="Cutoff", ylab="Density", main="How well do the predictions separate the classes?") for (runi in 1:length(pred@predictions)) { lines(density(pred@predictions[[runi]][pred@labels[[runi]]=="-1"]), col= "red") lines(density(pred@predictions[[runi]][pred@labels[[runi]]=="1"]), col="green") } #--------------------------------------------------------------------- par(mfrow= c(2,2)) # ...you can freely combine performance measures (pcmiss,lift) data(ROCR.xval) pred <- prediction(ROCR.xval$predictions, ROCR.xval$labels) perf <- performance(pred,"pcmiss","lift") # plot(perf, colorize=T) plot(perf, colorize=T, print.cutoffs.at=seq(0,1,by=0.1), text.adj=c(1.2,1.2), avg="threshold", lwd=3, main= "You can freely combine performance measures ...") data(ROCR.simple) pred <- prediction(ROCR.simple$predictions, ROCR.simple$labels) perf <- performance(pred,"tpr","fpr") plot(perf, colorize=T, colorkey.pos="top", print.cutoffs.at=seq(0,1,by=0.1), text.cex=1, text.adj=c(1.2, 1.2), lwd=2) # ... cutoff stacking data(ROCR.xval) pred <- prediction(ROCR.xval$predictions, ROCR.xval$labels) perf <- performance(pred,"tpr","fpr") plot(perf, print.cutoffs.at=seq(0,1,by=0.2), text.cex=0.8, text.y=lapply(as.list(seq(0,0.5,by=0.05)), function(x) { rep(x,length(perf@x.values[[1]])) } ), col= as.list(terrain.colors(10)), text.col= as.list(terrain.colors(10)), points.col= as.list(terrain.colors(10)), main= "Cutoff stability") # .... no functional dependencies needed, truly parametrized curve data(ROCR.xval) pred <- prediction(ROCR.xval$predictions, ROCR.xval$labels) perf <- performance(pred,"acc","lift") plot(perf, colorize=T, main="Truly parametrized curves") plot(perf, colorize=T, print.cutoffs.at=seq(0,1,by=0.1), add=T, text.adj=c(1.2, 1.2), avg="threshold", lwd=3) # -------------------------------------------------------------------- # (Expected cost) curve + ROC convex hull par(mfrow=c(1,2)) data(ROCR.hiv) plot(0,0,xlim=c(0,1),ylim=c(0,1),xlab='Probability cost function', ylab="Normalized expected cost", main='HIV data: Expected cost curve (Drummond & Holte)') pred<-prediction(ROCR.hiv$hiv.nn$predictions,ROCR.hiv$hiv.nn$labels) lines(c(0,1),c(0,1)) lines(c(0,1),c(1,0)) perf1 <- performance(pred,'fpr','fnr') for (i in 1:length(perf1@x.values)) { for (j in 1:length(perf1@x.values[[i]])) { lines(c(0,1),c(perf1@y.values[[i]][j], perf1@x.values[[i]][j]),col=rev(terrain.colors(10))[i],lty=3) } } perf<-performance(pred,'ecost') plot(perf,lwd=1.5,xlim=c(0,1),ylim=c(0,1),add=T) # RCH data(ROCR.simple) ROCR.simple$labels[ROCR.simple$predictions >= 0.7 & ROCR.simple$predictions < 0.85] <- 0 #as.numeric(!labels[predictions >= 0.6 & predictions < 0.85]) pred <- prediction(ROCR.simple$predictions, ROCR.simple$labels) perf <- performance(pred,'tpr','fpr') plot(perf, main="ROC curve with concavities (suboptimal) and ROC convex hull (Fawcett)") perf1 <- performance(pred,'rch') plot(perf1,add=T,col='red',lwd=2) #--------------------------------------------------------------------- # (plotting cutoff vs. measure) data(ROCR.hiv) pp <- ROCR.hiv$hiv.svm$predictions ll <- ROCR.hiv$hiv.svm$labels measures <- c('tpr','fpr','acc','err','rec','sens','fnr','tnr','spec', 'ppv','prec','npv','fall','miss','pcfall','pcmiss', 'phi','mat','mi','chisq','odds','lift','f') ## Don't be surprised by the decreased cutoff regions produced by 'odds ratio'. ## Cf. ?performance for details. pred <- prediction(pp, ll) par(mfrow=c(5,5)) for (measure in measures) { perf <- performance(pred, measure) plot(perf,avg="vertical",spread.estimate="boxplot") } #--------------------------------------------------------------------- measures <- c('tpr','err','prec','phi','mi','chisq','odds','lift','f') par(mfrow=c(6,6)) for (i in 1:(length(measures)-1)) { for (j in (i+1):length(measures)) { perf <- performance(pred, measures[i], measures[j]) plot(perf, avg="threshold", colorize=T) } } #--------------------------------------------------------------------- data(ROCR.hiv) pp <- ROCR.hiv$hiv.svm$predictions ll <- ROCR.hiv$hiv.svm$labels data(ROCR.xval) pp <- ROCR.xval$predictions ll <- ROCR.xval$labels pred <- prediction(pp, ll) par(mfrow=c(3,3)) perf <- performance(pred, "odds", "fpr") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "phi", "err") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "f", "err") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "f", "ppv") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "mat", "ppv") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "npv", "ppv") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "acc", "phi") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "lift", "phi") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "f", "phi") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "mi", "phi") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "chisq", "phi") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "acc", "mi") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "fall", "odds") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "tpr", "lift") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "fall", "lift") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "npv", "f") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "prec", "f") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) perf <- performance(pred, "tpr", "f") plot(perf, colorize=T) plot(perf, avg="threshold", lwd=2, add=T) par(opar) ROCR/INSTALL0000644000176000001440000000067012504526276012114 0ustar ripleyusersINSTALLATION INSTRUCTION FOR ROCR: ---------------------------------- If you read this, you've probably done too much already: it is not necessary to unpack the package. R has a very simple package installation mechanism: Linux/Unix: ----------- R CMD INSTALL ROCR_1.0-4.tar.gz Windows: -------- From the 'Packages' menu choose the item 'Install package(s) from local zip files'. Select the ROCR zip file, and you're done. * Have fun! ROCR/NEWS0000644000176000001440000000152712504756361011564 0ustar ripleyusersThis file documents changes and updates to the ROCR package. Version 1.0-7 (Mar 26, 2015) - Changed maintainer email address Version 1.0-5 (May 12, 2013) - Used standardized license specification in DESCRIPTION file - Removed LICENCE file - Removed .First.lib in zzz.R - CITATION moved into inst folder and adjusted Version 1.0-4 (Dec 08, 2009) - fixes bug with 1.0-3 that prevented plot arguments getting passed through Version 1.0-3 - adapted to more strict R CMD CHECK rules in R > 2.9 Version 1.0-2 (Jan 27, 2007) - fixed minor bug in 'prediction' function concerning the optional parameter 'label.ordering' (thanks to Robert Perdisci for notifying us). - added an optional parameter 'fpr.stop' to the performance measure 'auc', allowing to calculate the partial area under the ROC curve up to the false positive rate given by 'fpr.stop'. ROCR/data/0000755000176000001440000000000012504526277011772 5ustar ripleyusersROCR/data/ROCR.simple.rda0000644000176000001440000000252512504760242014513 0ustar ripleyusersWiPg"J=ZA.:V-J9ZQqZl=J T #3T 3i0I6Lb>=:\<8p'S<ℶ[TU1-Eî F] [3^kɧ<}>{[j2MrVo/T=sOIz in{ךyld1<ȑ: SjEE`6$cZ>ҍ]ϗ6"_ߒN\! {+NTM}LN*]vzyo#͙hin ՗݂j,]yJ1;3ji.at^VYx*wNq7/sO>9ch'o&`E.pS;I/_h,Ȃ;^/XDsR/}M@ dӉԡz`wmį~ ub*DNtˢvϠroeu_Ut`W?Jx圷WW %Syh/KDxArnQܽ 3}O}s3kRI{q6$&tUC*ו!4ӘwO/J tHE5h~{hOޅ=+D@Ntn~0vntqÏ/sH: dBSYOn߸,;څ񰻂>킔 inSq@|OVP.,P 9Lqj{fG[ϙ:gmf[gsm6}{;W||ϵmBBwfk_~;t4$Rc"NBzXsӢC!N/ y ROCR/data/ROCR.xval.rda0000644000176000001440000005012112504760242014167 0ustar ripleyusersŽX7](v 6&آ؊-*("at. ,؅:}~,u{S3s9g̜/ .hi#Cڵ[1O-: ̛k[ԩv_9oƜ8ߜ7\;7$ ]Ny6(K%^~@)s)y9}Bl'"O)ϨIj rz6>Rc(-䴅Sg|z~LJ J|BނS(|#[ÁrMXB:>2›˹Ot"*ψTbҏ(E,Ne't _ZJombMJsF'v9W7;Kywo1SH8N;T&e_a(I 'U<@J؂t:M&^1;Q 0:ݎ!o\Wx_)U%sqO#:L𝯌-)cQɱQn-M1tĹGgkcP&MyN /=p^gA~&؞!(?%rql&muriO~6qCY鄧D2oK[$xޤ,)~c۰/wj#MRʳEe,ȝ,\My+!_#'\r Bt Zt*] x4£MXgyDX{R}3e3Qb^T_!XGz cyM _1vS~s;֗B bd t%W7XZAoŠ# џy+[3nKӁExR,)l}qj )\[rZ'z)pJ~I A~A~Z4^͢*/N |)}oHp+2jiu$sr9LBrUk^z5IW[aډhӰ;`;c Φ,L3̏uƟh#f?.5/pG򿄙OD]8""I7/<7]fB>yzM / o}kOE;9$;eYC|^{n-~!t\:QK3~CyyNrk}ӂ|ź2qsN~53%WK]I-8'(|[Fi?-%z'K7;($za=SP36iի6'#;w̢h7M/@=ZzBoY}@+YܐHRB}3͌:Cb~ݱokO2';SRIzs zbztx; _!~'(1~UQ 'qQ&ԾDa%Q;6ʅ<Iey0haRh@#aB dc'ֺO^@1efO^o_V./Av/yƽ;wH1^P^]R)X}I}p(Ҥ ?7RŏB\8S]41ط(f":F ;/wW;=*!=;y`.sS;{{*/95?v$M!sd`] K=-?#mĒྭ3 tr}D픿P.dQ (&>=MZdBt _x1/?o] aLכx!{Quʇ< Ž ا3nK~;"z9F)M-꜏b4#3 rHNDʝNlGXJo͒`ziׇুW0E<rat| _ZߞC&)-~| 瘴kJ H.ɳ-'2z,fLV`tHzՌۨ^wf^⭇2&I.1'̆{{x;;EߛҘ?RĘ:9aXf$rSԣuwA$JW5|p1_p_KpsBJ!on4r̈́I4xt|gg<qP;hf իm/zQXgR/ rw?f9 W@oR_{؝9ɱ _1yŏ}1b7*Tݷ /&igjfcJYR9:R2˃/97h(W5N]^~-;M.O2|yz20A&SZ{uI>mK左Qo ASh?!vo7rz7 g#>ѡi !s8eN}D/GNEm,Y-4>៨?z< 37QXѐ}0fi48G|A*qhBRf@ׯV~ߨ^u{7}g }fX:P?iW!lG| )nUP*Où2(0>lg/PՙNĝ!˼1oH,El3~R)j_Bs"˘1)zlCOlq_cITu;Q$uZ };^@1c^=OYPӡo ~!9<ڕwv[xhEN1ʧ?/4i׾X/ _^ǼpT9ʇMa7,Ym+{u1n 1{^*ȣ|17c:W?܇~qLu5{]j6mm Ks_40.ĺod|(Mn='<+/m#~9Tv{tHt-OJ-Tݹxh ADg~޷D̟^j3_ގ~$:!|8HR?eDԿ0z/&+QV!Ϛ>gHxr,_C=z7R8G(,᡽h%|7co -d\\J兂 [הNxo=2$Ң֓$%W p ss2/PӺԎ:w9;CN'[$'=@x[/Ɲ?4$:Xvm:xeqp_ LgObo!:2 ~I'GR}%Ҵ zÎ65`TT#o~9q ~c _{ bӦԮ$t1K7`J?+śW . :پ#8!'Er+˩*~Opr=֜G^9Oqvڀ}C:IBo0ڱ^n4j-}A+wR*v8zk6bwiFt;DUsgRֲPya%5ߢ<}ƝiT`=̫h^_НP`a}di ~\rRY|s..5].p .gu@;+ab_J#Vo 9H~þs+kF0(/pG\Dz|yo Joi9J/Kp`7y W3 ǔ.1>0զOfx%F& =-#%6Fڐ2?JݗQlzPnx#aaDK5+4ڕ8mXa~u`gA[}尤QT/L|yu}-qjաc_:;xVi+-)!^|1D(UEJ7ԛ"u+/ ~FA/R>Ɣ>*[@>n @ޞϐ/#enn~Ɲ~).# W=|Ae r)y˧ OwuL3LI67q_B|{:ϵ3.r/ vm1@WlةOQ$]AYULpT/8r w Ǐ ;X{Ŕ6^ viJcb_<t ErVڛӜWƙb}X~j`es؛vCaֲ`~:s?(fIC4[@9*sfb[Y܂??zc:Gqq6R} CX$HWfYocP*O q. |i _~k]̺TqW$ص?_w,C?( \KWC\ϝf}swhr ^l.?Fr8;(ŨոZCp9n^|«~?܂?_%M.At8~n5r*m@f'Ig4օv][ %m^Ƚsk_4Q\ʪCˡԾ4">@iãB·񧩽`N;j$h78v`N5|at^+_Hq8mXL"W 3|1eY%xMNe ?p7aXc9Ԯ(򦝾9w QmՇ_:{ݺFDߠ=ztb[q6}T}g<6KhH =9#vzth'vi y8d?7n8-a' <hnx"8ʼnPK kOh-%>ȾgQˆ9wPOfrH }_BYyR{o£j9+nCN&%n$)k23 eG影 >sŗ]jăֈw|NxL?o23@y?R][h yOw(&׆\n9mS}ZnqtzNpjFO:ը?`'4oDyu1O.m~mFD~bn|~kpĉ.q@듻@BQe'Kf{^tiنwZx$u'm>ZiDD!_A"]״OWXs zcUIWp 'p2G,P̜eIgc=ڽ)m }Ҹ>Ч77:a:?L-+{Jm\D1]Ϋ'5q0Nv |ܺNBY#qO>:?.1_SI [oqJ=ްo @${迬#[`H{?)䜛Rk/t?gqݣ5Ii44/QQ?CL;X Fܙérro;OOgA~|ڳSGQB!w`6NxTn=REs 2[AXg®.8qAW_)gfP}f_#2# -q`d] [ %X Un8s Vm1\3iSBC( ီaЏ SP%jgP<ŶOk'zZ)ct J핧:e8u~ $52 ȣK7 xmO x?ַͣ"' Qu O#"$yiSJzZQOwsycFHzO#gßqiuj'q?l98ܒ#o2^:Ryq4A}wܻD=!1r[Cwt7a$?EG`_bn a6gsD|vQ&<lu#X{2cBS=uCCyR9@]BJ0;oc`SLE| kI9 ̋zl` #,ļy}Q|~;1ދpǍb~(@%Q57 :N[O4˥Idߓc&a(ݶRKJŔgh_CYFu]o}ߺ arc\,ɴMem^qH 2ݳG9F~#-)8W5M^4.\ :9s)ȱr<g`Q[x x:""}`*ܣK z2kێ9D㝲OFqq@?NDZRyhMJq$8^Qgbv.@Kj|Iٚ4ߏMq_M Л"vC?1y8WeOߐue2]C\Nd :{Li1mtLN<{#/^2 |-' sJ;r؟^݅{hM7$ۢq?˵NEj'; `3e9N[q2_3Nw}~Hě-F{w #u~"~D~|φEF[j*[@|3\hyRT !v ~r#Ya ':c=̆ܜy,#>*.ù˜_ vS-jޱ$yZ,K>}]sWH_ay7z79rKW8j'|*|㩼̠JN*~9h/Ex=6ӌ oﮌ4y"Sv.Zߡ'[)%GL;NN}ĽTn~@媏M7Жރ^iNr_(}{X7l v)YĂ՛m8ԙ`V8oG|[žV$:y 6<4 J5^J{A<2#wOInj%2>ijǹHyqb=g0x_ 8F@5y~͎} Y`^*z{yq.V_}A\]V2%{l9higī3d ,{ "|s#Z0ɲ|yr ߕ NKӤ ?A|rn##'QU> o_B΃?+WiȹĞ 6!x݈Gʨv‚8 .)uCF @R͇)|*?]Cfɦr7c}ȞCpOJ6#E݆u3g3_Z{:MI/f!6DyXxwiMF~rLdvE9סJնƥ?k]Dxm My%q) _Qy`p@+u35[|ŷdtU痹$0唚ŔW~ 򺤻c ˔9? 8b$B>Q,W((g-Ws- ABEKNG@ny˦DW#;z|xT.- \dTm|xu)BsWfgՕ!N/G1 +Au;0'?ü${5yEݸ&+?8Eu:8SqOCrhGaڼ4J?_Ac^n"LۚxXdsyx/KAs?Q衈kT.!z{SX2 R6]FcKqxg7D\ȋDָ/DM .nN];)؇%dawn^RYecuGW]o67TxD/wa(WM9q'm}cCG|5Z"^JsO;2N“lȭ>|_C{_e6r!n=C` ֩8⠲%@~ ߚ<$rr*9}`~Α+?._q%qީ)c^7 Y0$G QF<\uӅ%ס'wRLgokvTy/\^D ;8/"89&nuG%.'#٘a];!.2 ibMIqpNܠ 9wlEG!E)g9Sh*8b~WoVC!L_knQ?lDIybD4BI-H;J匸a1Fј>mck*w?ݔl-(a$qM}~jِٞ3ZljAͽj:^K?"΃8=FWcIXT bA*ESW#>/L'_/M}@>R?MYnCztn> w$ 4AxU1~Ų?Я{vE'sʢV}ifvι筄WjO|5߇_Au<=?|)_?@NduHN}O]6[@Jo;Z*JIHv̽kB;='{뎄/vw<6YS^mIO>&KSZ9x_=."qΓ M}KT&R#Kc.?r?f,%zFwd)K]Ǿ$l!K( tz ^nqꖋx/ Y.vX݅WSznE>?l{i)xΙ{SŭBč)ߞ$x";;DO| ͙_~M׻.~rWݐ.yk 쀼j8fP؝&=>m9c=HK_L'3ly¼۱/ϵq2=S)iv/X6GiN:vOEq G{ u0N+~HxoŹZa03[6Fj`~(mR^3/>~~!}>U6gŀS_(lP"b$X4@B~Kͻ{_tX / W'}DJ-w1/x2禃q)}k jWJ8ֽ|^va ovR=[n>KWUH}QS:b{)*ֺa'!! {lE |xnwqO>N@dI1K+12O20{98cߛznjٗ Dr?9N̨ٖ*W]j&03K([)j{'vwý>k7反BR"ʔ.$t?<6r,;q#zvk]q2@< r:b\ďOOJ̡r>,Lo¥Oa?`]2F~,8[E/{BgXLoĹDk y)Iu߯ T#+ b4_p\)қ5~Z}z{Q\@W4 ̋mOxZr7°n?O& ;v73eG + h86n&?S,WdI-0couL<zÞuqto ⏣U|IS>Lil5Y ?|~=؏$!yx24cIKc>6~WN޳hLtԱ^"H:DZW|yND[f5+p>BoΣpxkw7}q.洅=Ab+[DitéЇO"=۟@xT;5g&9>t>In vył~,胹"|cnJyʟ|sדnQ&1Bog Z;GIP9$]K'6m1%us{1:בD\)]x#l?yN}g{t礟=c\Sva|;:Ȍ_+ 0< ҽ3{yٯ4q%>7ʅٻpOSn6_ZFORċ[m' J/xavvd]/^s"fn8LmZb])w\&N{)N/y we@=Ot'}v>]e=[ͺGliWWGCϣq/x=E+Sw?rvۧ?-.tIkn5+~A#gv4RQ'ӊΖ9v}M`At#g3j#^HY=`]`)Y ;=8 9"]c}OgXb7Jp7a( }Q͇_9ƥrjo2ྱ!V$oh˚ݐgۈZA1sRS} rl1.%~Gq~>gv= =41L/w@ozי98?DMrstRRIz rҾ_|s 7@I)/+ Ts)RSwo|73$Y7>C_H|~Aբލxz|!x@o-+NEypnbLn Xoj)uXŠr$ sF Y/gҊʳ\%sP_'m8  a3nGk!צ gWWfz!)l O 'OLah6iz-N}|'roP t;dD_෋ *;1[+l7g㽭l2챙W✉K{~_ ͂m,}^e[\w %װ)g ybɠ<O0PJ}dt34rN x b:[)Y6r=8??#׏΍ } b5 g)Ǖ!&[>[Xa7q-ξYzz-%p?/nuyߘѠXoșk_ w&{cx}xT9#eȾl!xƸGNDνFca7  "2?V] ΅,7{{up~eW xx _83Că#~Kyۦ Ms.U:.Qܰ'oVYKn:㻃,˓:/Y⽢1 TZ{TxY-G [%'(ڵ2.Ep9aǕ6Mtx|}G4?]6UkMyWa'@*I#zA~Gv?c9] 7㼗d<@\UI-TҎ.=GM ;ž6 &S`{vq 8v4^\~/~>b$ NyԔ< {h+ԏWl1a<#*vǀO ?{.ӈ,f~yu)Rcy!KnP{~3{!e#%q"N#90y ;ĝi顙^U0[?Qx>nxi*Oh;\{[Gu.עB;KJ%tO"_p,Dӓ}Ku74 Y= Ge x'8k?CVs<1snze'!RɆ?Rp6s~{:E`|b*|8`RwrwtSm},Y])qm?oI5mE =m_To Y93Ni;T+'gsCsm@i؅@/a/c?ك*<>;eS[Sp=B̵ԛ |[ ~5Xq}kH׮¹-@wS A#v#)aJ'(>g[ݠ8t7 G3˘]H+?CM3}}PB o);SM}Cy=cU8oyp?JC@=@SlGE#zCߑ<}hcTZgn\$G}| L&eGD<(1aA{8wj2̉^#s,'&?rNnq)=¡=hks5T 4ٰ 8& ^s)S9s``||BT+>#hzvN;zxg-n3`N<-@/fCMgY||d9k'(R"W7r|حK2PpPۖM-(Nח蔟?8AOq'G;GQ]ĺ}NsV lQvI+OѐNe"JLj;- UꖈopÜT |7ΌA4.Ϊx?uif{j'ٻ p"|OӮvKյw:[}haĽM~?Bt%9!_^i=Xy z1x7awoR}:Z*W$m<?K^B }[;hSA^W9Ey\F.*}~oܛ4\GH-I-!$--#z$yN$2WkGRA(z{f7ȷ!}jqҒMgk|#N N!g{om }Oi9o_>_!YExͷ࿪d a:`/h BRw"\kGxFU"mwTNhY׊6f.SVP8Rp*|wq^^/Box}NDW!-O6O@4;=PȫLĻƠW߀qAR ՆC3woqƥ8aQ{=Bqۚx?ʼn+ƾ3vu/2v 5_˖N zm?Qz(~l{Y.g3f]KSS{^] ߽.:/IO@a!L#|L!-޻^439_Q/ޙ.{b qoK:38)Th~6@\qk2R {onN~cteKhDlۦCaѾwHa9{g'gdnXi\^:΄18GOd{F%EeWya^$a۽Fg?>^g9ݨ]XJeEZ,|5 =o#!.zs8Ge*3o-~k|yXJzzSagZq~nz4&@-w+ DN&S G3lF:8ƩJq+ڲѣ9S=Mƫ5k龃1oO.'d[} x"&C4M@85set$vE=KGX9"_V0 ߕIq+ wuD>fxp>g/ETj!@hrFu{xG-^%9[mJ?nK֗<8,_4«4}VVTMUvwEP乪`M?["U~UkJWS|^UW=}<5]_Y;9W_uk轕o*ה\TWyPyy~\^o3_qT_|yUWQyUW_uڕ"z{T_uϽ¯iy>jz_+|eVOTW}?+KOUݿ*z}*祦/=^z}{NU{.4ԹUYSѪ*].]5ŗ?[__M;_|j+/_}UW]:ǚk}UU^sou]rު5EwMU6_r}Oe뫺W_uZZtTR<}y"<ח }UWJWe쯺WvZU5=­j\*©}UWEp+ZQᅪ*+j/z.WUZ:yE?}@Ype[^?k_Se5>+ uǚ_]K?+ÿ?T?*_^yUUYWuY|EtV5"8UWeUWu}u+75MOy?})}W}ב㮮|UW]z+ w©Ε~oU׮~O_(}E+OUsuUԾ+=W?**Uw*/_nj.ꮃ5%/՘m_ޯ=WEt[XEx+_^;}_UV~U;N}繦ۚZեw+og|*}5ӗUmWU]5=/ї~SP^){oe^}AUU~u{?)]uUSr}EtԔPYo7U.U_,U{.3_tTgUXՓj}}}]]}<8QyWY/빲si"*WJ뼪*ۯvWw>m~ꋿ_U;ʶ.+ ?5}UwrRYo35}^dk]t!ve-[z!V(x|3 1yQ;D~1 $Ow0t W 5H]_>JHszh'Y'7u!Ykd䁝s&=b (.srQ1Rҧ5Mfvi֓x1 )بqImHOV۟}*E=+v WY/W?>i3]V:rz1dG4 kyz^{"z콯9;"/* 5YYR^p0"⬴>{I~oM]HA31ck͚ e!nvzuo)7t i h-3 Dze()2蚐ڮ7[ӲhpeN~ {&BzjO73(|M|.\:nHA6]l:/Cgtڦ?Y/@>k4$;FuQIk@-*b-րɥ^vn?B{4GC|]kG+FN ̎?3u혇fp uo>M|sgkgVͲX";qM;P~,Y茼ej+>1"K]&d̅48j!PR{ZG4?oz:K\;[(d筚z4`s]vu)]9tݰA Io]IBlrm'V?J+uD| ^1}9}&, ѷDn'91Վ5 ?,B&.[bm]4AXB}#f-+%IAp D}6o_~nr3nA{H;7CU|]ӟ m@SRi;]|-hM\ q_AwZʬ*7$ a[BڈkM6E߯&dv[o Ã5 87@UO?Xj /+=a_ ۃ6!-&;.@ƛ}QhLeK5 dB=Na]v>!cD]g̓lִn(vu`7 /C->r Ri?vΡןq.›C:u2=<4Mi{z$Q3ptM=}qС!Y:,"͵]L!>^]!v q>]1Xo+mwgPu|7dGYC l Y5~Hdy8B;RRKr5Vؗ5EeO#/,WwI̕Kvv1֐$:P[Yv8cz|O iq*9Ԍy@w"rT =ڈc!-Z4Ԉ4x>[yl 7FGqNԖ֧|TҼt;4..݇. K@Vc⽁>Wʼn9R2.`:mz%ecG"ՙF#}9*(*eдGo>VWuׁ5}7H?pQM㗕4>e8wkY}A3ِSz "ЏޫBT0Z=<bG,HĔ|BVː4~ޮ}h}8Rhܤxt+ZLDE`!8WƦi< 馕wz#lk.SFG!a? ӸdW䇣kB9&kרUҰvD VGKewQFYe!QhHARf6j q؇V_3 &N[![دvU'D7Vf\џiÉȻgA Zp Zv|M82zڮ$giL'Œ rÜ(PN|ƒDR)ەb hZ) ҎW NӸ[7 'Q&r%W>~D>#7iAE8;VwY[(S?R/"'?)2"e!^ZS Iƹ[Ar%WMzK84<Q}y-cUwؖ(vHzΜY{DM '"d &?XB7πT}|e4z,ׄMꛊ-Cpt/z5sgmؕh?͋<XOvقƮٖI>Lsl;C/.. =HvQ3fɹ~)T Cs(_sK$濸k3~j*3"{̂lpg5 >yR$_MYC6fOpZIrnw:zr,7\ѥa:.L-DW39 z_Լ~F#A݂Oc` \7ká:}I ݜ ˞DEK%$*~4^ڱ!i00 ZL"wMcs `S^7CiiQP;h: _}/dq4Bd*HMWoP74NlzZo{}ŷv-lJAH\<< zK:]CB^Յ?e+Pӻk͡_5Ci97W[=8$IAԣPHBTή_L㑡Gi1m~ bhOnGݷAfGo['C*siA¦vLFE벏lNk*sy8biΨkZ/qZ4 \@z'ٛOhZ\:מV :7ySh=^%~ /Uu' `auNo,蛫ДN*٭#Mw^9YmsTOEYG!~XW.8K[~zk+iCΠ|LӖ@Z2,7G.Uuiq?~y6Y}_vs/YSx- IBjO5<^) Z'Ah{JHr}Br~7#}iFϼ=ANǷxklYB ^ fAyt N,'%4AO'́ |";śaۢ ×_D5SsAr\Uz7ϥy}B>yjh\JۡLx"$EǨG 9YXUTœۅ!㟏!&Q}ForjjK~юɏ̉af^`.;_w- J}96繜xX>k {i^S8tD%&ٚO1 E7Ļy= Lud \Ѹr{r4u JyoJ>AIq&-JQ!^u4V8{yjV_OϬ%1?"mg*2zӃ)!!,Г$7:Ōx̻1wk n>+yOSpA _j$cQh.i^1sZO"C͋T}Gx+ԝ?lH㻆W^h5 ohm+&j!1yٽ_fѸzZAnt[] ǨY5TWzMVMiH<\d k67oʫ 557=B6m_!uiFpŽO³AUX^Bbݭ4.nxh98LPc}?O y}#g@ vݐ[ &kM>U=G#xNx8" ߧFIQvRP~*4}KwW3j7Eo5A6ƻ~a I.a4_E#b+/z~ uJIm /oI"ڪ7$-_Ut''pNv|_YLu߬G_ Cm4¦w/:ץɈj(l u!k LCAuCީMU-M-.M;i^n姓h;||c"wnA3qw~ ;nҶkyrf4 M.0ns g>neGf&A٨3G}*"Ґ LOdo}a Y^z(gVЃ(@wy48ʸ(_7v:7VxTHrѱ(!P: .i}bywV O|s-ڊ3 %TV펿 TibBpo/JTF7uh@w_Xz^; scD"ʹE#qhh :6 &gw?h{ }׌C: غY#?!-9޲`%"f}GcѥLZNmxs,}_dzOoTk5BPwnLD?j Ƌ4ҭ08oh*%ą/ÏEz'!~ 3ӃUH(YeK!So<4>gؙxT92сCA'FG}lޠvBnZđ9UCxf HٶVR|;{ԷF(LWf]&1R&ef_bf$ މN@63H˅s|Nxhq=_7 Nx=/?˺Q@33OyQ6=Dǰ76f_3"G*OPUD7*s̗CtxԏW]6n3iܟn~6cw8AIzsKtL%bYd?nOlQhrޗZ4Q\짐6e d-U$m׫]g KnJ[Ijx} !-=[0`6/4.P;Ҟf,qhBο7oAg03 @\No(y2} 9yxGo6kAc,/-NpIl|G;=߽LAD9[w9rfxB {i2ugYj'+ҳ#[bIYc MUą\SQ𝽐]gH zzxǑ}ɍC0#9}b vդ _T=Vrg4_qd 3xX47g]~7d޶ !jxg?[z 9mG~[ m vp02d}+,2-_rLMfLYP#6< ?URڄ#H֫ٶ9nm M!?v&coso$ 2m-A^)`MN bG)}rI @mKn5 #InamC4>x_O1$}|, 1""NM{@2,ȵy('}.d˳" j5[wׅ5 G$Uh^Ra:`H/$L*ZW^?[ \4$F7pq; K^At3z[pml?$5-,>-YO 3>7U~$> {4^6 D](7>+Τapōd <.{~أ*_}2O;1M+?ys!`0d s__C\=аQv!!23 G-]opmv*17C%{Ut Q(/&}Vk`G@~c 9|=lhʬwN<@oak'ʏ7!:jUsV_^ 86S5|HDgzv{(>xНћ&u#qߣe6rܙf:ͷ&dAl+v6F5A:7R#]e^ 4A9ٙlkK?A@QW#JNC0>|A)8PEölƒĮgn' >$XfKaEs sjqLpO+ALywwPxpy]-jnffZ?+o;Dyuϵj)DqOl!~?/INo#:Zo&,&lܘh^ɄA3*$q(iގQT~=,c@]W<~e˻ЬHo *:ɑ~YY_v޷Qס +^!h>X5Ud,:׾~Sv6pɲ;/΁~zю۫HڼS/h,Ouj1nP?*8?uUA/ns>]pJskJJBJՙ,PZe_RYgX'9W$WIY(Wփ7h\VwyȻe76Cxۗ׀ay%Fo~.7I- iw& %yTD@w {Rqx1ozXPŌ$ⲥnVм5q|d !ՙo"t DmAPAOlAb5ѻhQW1G iVu |hjGtU_p59ÇÍپJ[Qdn4.xzפpͫwM|g;ug/Wzgs.u-_ ܚ/FkAs"EikvlR߬TI5Fht$rͼpxCˍ {mW"< ~ACwOt ef{A~l ɴn/3mlȿ~֭gNq12fg,Qލs`y"YS|'_CzO;Nd/+pL,yyչ
SANhTY5K+EMT60kuguqW/\q[>RHvi:~qY-#!r$]mouz: xWt _5L%2LZr/% f Cäq*'}Е@:n;ְFvoP{.CC~KρF8~?oo*CR#UF]GvBPwӳDv[Q^cՕ[ί_#;-ؤȤ"gE3lREP4UGE}=6>ȴ#0cc$u7 \ -`l?VwXJ׽Ô|I[x Ce%7r;qhnN2bkHH9I VkUyP9v0OQS.=nMcaV4N;jF[ϓ#rh{&m{$7a1/H9<ø 1c׹o:hѬ5 S4KТw{JRZޜ3փwAw&mhS_@w7`N:enܠE"lS =owHW_.6늵!#> /IW~j9am*t=#?EW\*DS&Nzds\d?]&=I%=UWnRWZì5( Z} ն?X=uw gM7@Ȅ= w ݞ y@z2xC]Oaʹ<9~6MW Mҷ &< 5O-jf^*\/E' [PƢ#o]31(AI$~ʯTCI kPTٽ{91i=ȫzR0<( <}NK\o!<{l0!Vh jԉ #Tj}|u{[T zB]m-L8T!UA| Y] ֠2.DQGh|σ!˞b_ WU9@ٽk һ<+sq1-HV_N\YZ6[hw}s.קo3H\u(Dl&⑧6`k H˵DO}xkg EG1|N?~8cs'lKn,{ OρEjy>\qJ@Xm]C E¹)t~Jne䴮Qjh7~۳;h7)џG Cu>F˅~"uSVD\e3mї4Vvt!T2iќQ:V9TԱ Kζ7 |7[`dJZ!'.1k(ds'ЀƟ+gguB"Vc*E7+uB~p+Z ~Iѷ,Ş= ?q;@9f$+=2ho$jVYS щ9VUh>O5r1/OkӾ`zwc3٫[ UbʩIqvz@k(] 4Jdr;"z1e \F)xEq.4OL0"/ڮwO=F/ӻia>oW`&rJi㴽 O ^mnAv/4?G>e3jHB '/C`v|x?iw(ߖ+@/ MK%buE^U4~6΅dY?G<= (Q 2E_0A /=!E7tB!MKtHymqjyѰ ԓ7`pHs+  hڑlEICVyD anDDӃ/]βCQKiN'1e?)\zbս[ŏj|YZPWB>Lgo%؍WMV_- ytBu|Go&LV Ia\wHP1Yha>:s)An;^ПooH']?s˧oѻO|d NK _zi8(WJl6DzPmm#^a=4ZKY3ztHyG'qS~jE}2]2clD 48z /\y jhV'D _ІtơaWz ^t ˎ/Y5^jyx 4ŭ(yc6JH}%#iܭ9|5rG0*`nH>'6io#}pI ƀnRUl 5KCHbMzApgIf|V1:o3ȕ>6rhcrYǗID\Z&C\p%#CH㟺7,#\D Xia6)"; 9~ĈH.Zݹ`ath{fNo ׄ>IG-@3oek(.+p!Xf^ Yvwϩ "ا<&_|۝Σ!i51۟ 72mm~̀,?h|Lrcr<><3ϼC.~-hA Gm(ă%{PwHdݯAeLrwgu>, lP~՚8ZߊLkiݽϰ4ʢB ˝PJ_jMj(ϗǙBhIh$}8i=Za.W7QX"bҼCGIA,bqONҍ>vه> 쾢︹vҔG!T`02(a+Qc ?%fGCIU74yyʌo)!%\N$Ai]wH9E JTGNݾ8E]T3%_N=׳ [lgVQWֱ)^i7Y6r:ɛa /liּ`n,t9{V> ꦢ/Oaw:)ĄzCb|op8-$'2u=gA);CAb,ӹ~.JDlF٫Ua:SU/|O?uSOYĤO!讈 |=&\M[ѪauQ+ӆiP !]&& wљ O^R> DC4fH4dfl!)-7FMԑf9cKi9x"{S<9v-vNn=ҦY3 !nCC p+-FXJJX2(⒍KjO/: d=΂9Va1^8QN{CMs =x1v(@wHpߩh9FnEoSkA1}IC HL6o^^ dE^QVC4|5vGP2}'1m,< J[FMFlBg>|խ9fPi/yI`F;GYAR7\b\ufhHs$" ?dAؘbij@Quΐh E?db!zZIG>s:[?{_3sd;/r >2p&A~`&Z|DwhCNiְ! m{8\Ulso3GլjUZ: h6Hx]yoÈ]*WXB z|ȺobTi B ^z/CtR)d&ξ-OUƊc 42Qԏ}C j$<v6ȢOL@d jAٿ ?=ÙwuT4vE<݁R4RIAԍxh` >^cv\afw6ͽ! l%w&|"F~V֑5]MEqvףFAI`ZC4O;ršVouƍVuwkldj8vI ^Q~CTEoIBrzS3_Z;,p㺰|.i?AU6m; 0!X@W)/ZDõ~N?2䃻d9H+shTҢz:S}X/&K6\yޕo)3/p0k(Q ̞ I۬-@tz+CC~Ɇk~BȴF=Dq!gP?/i\Ztu xVtMپ遦Zݏ2+:M7ԥ)ҵ9LڞIry,;ؾt+8nbL.βͤ{bz0fD}i|bνr8)W9x'!Sُ քsYع8^myP< ??ӂ/U!uﭐM2in_) =YW*Si1f*9q$5csd*Cf}]9rhv&`4Obk]ݜ:+,?[7+xs2[HY[gO_[L{ooG~njW}_q".*);f{av&R93U-ɴ^֥*^_KG8y!iӢm(ţv!]2s} n>xnlR]fx~?Ncq}{huI!`Ԩ7DWᾨDdbV6S:Q4-LxDLc-+anxTT5 &^:'d1OW]!kl՟~g}p9}Ti; $ib]X_racs!5hHnpkZoll' ~@ۻ< ؝0"̌QkWEx%=,;.)@~/_i8HfҺ} _l~9'z5@HX 7<~Agol6V;s%#fACKMuݞAriK.LÞa ǓTu;ix'v%3%9r-T̕9.U 3'E'ۊ8BDP0 Դ'A[1G_n*2X iJ'.3j/ʚLj}L¹xxfتhy b>M\ĄfEQ)(m)dWה=w LW=±5zG?}aܺٛw@jnR@Ro5^]_ ?iJ3_:2|s -bCp{EѠrvl? =."2bF[ P rvC )Eu. B>H a~ %JK㛬ZPs3 CBv hJeQuD@OȇpZ$gVwYzg\>B}G]OW{]c?Ɇ΁Q-jˇhbNx|Z|] iGg_7ް7WC|@a?6x u3Ҵ_Lw& gα5 ֫p B>?^d/~81d:jlzXnP Ŝ$\q퀡WO@5 1c&lJi/W悿d;֠m"*  !ӭ4Ƶ‘LE4!9sq덷 g,i 伦a rv!ِM}1ӟ|:!,@ז+F i|ɱ~f׳b8ʶd: -Ws!\vITE_ Q߸j3i)<D.4N'7P[޴|:! A 3&)C "g ׏h\ɕ (b}iy \ !fK 1}/lGSp^~_ ⡊yÂLٓ!5дXT Q/UQߨq6rf?s O_f^R6n w[vk Q mw֋C3:zTg2cGBz4^^ƿ)8Ix L9v-H:V{qgw%KTV}D$4x\IJds@PDْORUs{XZ/¡}V]AYƛO:Kjl K߆Tɸ7i}#DP4}1Rl}3ќUrc˂v Ibn}+xu!qιѐ5ۧu+Ml!'}fBi^Eg_!0jn)aݻHx!~ I9v $Sro"ztxr ([^ ܚp H3e; UŴ~96̝E0eTi+7_>ay|ɟ9^~ڡ~)ly}.eCK{BȦKG[!iCܣPoZ{foƘ_[>ѼI ̒$*\$p$_*g3~itԭs[2*Hv·UQ Vy+3Rݯtu@3i0h!`{_P[ Ŵql"&|zIA^E J0]'03о ?G_>+b+Ú/'k;-ߎM忲S=hkM{$$e,(950s.9~hFU]Cp]&)V3?|0k7#ț,s_W+M0@=~?:7n^+g/4u P=|㘖}އn}@ W5u<"3 |";'!{ ky9Us\/}} u.gQ!hhapLMujNl݁cUf?`Nt(1t&mlr@8qv` mFJl:f4|Fݬ;("ˎgbZPsAPu>")SiPDޚke6u/ܬK ]F8 'N_AgTx"W}l4 CSznB4fdpE|xm<8ՌIAt_'}n]^}]A3[RV2$@u2~vY_4mRs6$*4|W\Q9(3y$~f+SiTMFMuKB T褐pt&/ni[oԙp%9VdIZܽq%"ukU{lX=UjSf>q~sPs7} /_ O/δ&T&,{+|xy Ä́5ţQ7w(ʼ:߻!u`GV867cW· UouF TvɄ%\԰Rmฺ5˧&V$iϚx|dDS|KڨO>e^C^3M 1\J UqۍC{s'Ne}"RshA-e~N[舚kTI]]0op:TL<;il'J~64N ^u78wda=FS;`U<6é W7~QVn\oo]/Ss~3`p# S'%gNFpfh $ZʻsRLF[}|Çȍ.W< ǣqJԳGyN_V5Z$?qoA)x~׻}3W(ߒyOmҥBp,`RvԹ^rc ўnףAIQ |x]MOPpf֌fky;fxC"6TD7`5-^lbށ_nѭ:GsűJ+aCjO+|pT4d.Њe#ۛaJ(vh-zn$ve>Y| ]Ϡ"z7lfr> I1[ݢĤLzX9}!Q'qSޘ֧!t ʽwfXtD֏(?oW0ئQc\!{lI@7h%a0.{$;nӐ$|4yI7 bx] ԭoPeue"oƭg'/}&A&.(UU=ø h5 'OXK.s'$<sڶO[y2Id]ϛQhzc^Jm WB~_MKys*F֞|A9cC䨺t=^y1//SCPԻ jUή_ Al;|~nz$h`Iޭute)ǡvfc u!K^ wus!#yI!*d+$ch睿!$,oĕ]G>%JK"~ZoBJx]?z_V,LV 4,4:g'j|Vmdb4UxMUFYr^{2+ _7|y7 q:󐙼s[V~sA֋`WxeAc"1NxBsߺyWIy6+p*WYf^ǍM}^qGi5D 9?+fgIYȫȏ#1A9HEjQ 錴Mi9Ǒkf׈S;+;Kk5Cwqhk`{0~`wEhݴp,2?fSo|Ͼr2Pe!ӆ]E|is F |Y;rk9KM67%W~ g85ۍbg@xey9`l/v-G.[ @~+¬~$Mt߃WWa[-w!NYaz ?3vjq"5$p'WaӞns~1.:w͘YVsJ ɐ06Y~oCC苘cYG=vx.*? j=_G( ]$2maW]Yw{WUvxO+Z噟#vgk`9+ *Fsymp~@xH+!/+,%ŷ׏WM.#wk#ݴLP:txgP?pUZ-N4B3Eh2ƷQ:g kPЛi*b nL 1(2ߵb} M h)H ~\)4>ql;q1kcGn04qs~W%ӯǽ}_G6.B}^ 2>S$E^بJҹץ pΆj .qWco5;ey"QZX<^-g'tUg{_-h_'l=M\:|Ep9 s,EHRQEҔw{3C_}8_?e+b/\ҘMи) +PNV|3'&gޠ\']5젷]暧_[c:YMr87'E7c@<Y^3OvGS/NKi $,@Pi= Ԯ? B~ǏWV4G>`ݯ=Eѹ]M;Z9y`uy^yڼF[Eyªh`YJēvǾqDX❾nz|L|cbF68 ]QPkU"S%NǢo@0L(Ѕ>V-9^;*yxXY6 Rk7Δq;\oJ=_gmyz|%ek>e?c~y!.memh䁵9x6:6ۋֻÓD%y\ 'TG\q֎in&+Fnȓ)ezTՎD j3y8vF@VdEj%ioDS%AQ{ƥp . gNA̜໗MaH{Mx{IOkZ 8ztŒ_qx̧}]r=Y{yk% ZEjn.çC]/DN8Bj9WFGk+'Nq^$Z [o䧍ݵ(1 g\{$qOr D8a ɏ HɣNc"S.ÿ́Ş4^Lr`TOvqsGsP=Lo>m j)X IX5/ ^(.g_ WQ@E˯_83yϡ̀{tP=SΝ7M%}݃竧_ؾ}/em`F>پ09JX։Ks_'l[U%Qj3(i̘Ù ΢qw?zÆUYFM;?V4פRZ>;x^ێ& l/'dds(kVѐg J۴Qe 6=a(x3hLQ{UהSɢ,m 2~}5w >WU&t}Wy]I U|Zo-X'.Njtǩa]q4O8|!JP6çKݗ F@׫;ƀ?3dXMxRu sgך_Rm/G٨JK+,pd؞Kw~;s;zU;3׆Ʈ{6UՔ8pK*Bz'xlD~5LcE;^-zkU۷HwTCi.&\jwiYt$Z= %kkGuҕ??=XNil[9pI c{v]ˉ[gNOW=,7eGO4$Smh+Ow,؍bFۉJAg!_OVs-'ieeSyKĥEàh}6~h1kۣgISGi޵"l.뵠\᫅|k(GJN0#ԹO3jz vg}Oi9wo.'ё 9Z?۵1.Qb5? ;_3n PJ*Z)] %{LX*Y|BŹ.` N0&x|W#żq{IZC/$]kŅ]A%-҈׮sAcԃ%A 6a&H.__C5m GmvgdD^© 7 q;)y7SH-تj42*voO6GvVʷn=#՗ ukV }N-tݙmȭ 苺dwM=&35QܽuCTS}l6]ڿP9<Ʈ?)4򉳨ܶaVҶky&=>S]BdHFXX B ^ W^`mɄk9<`=L?U6M #)?>L|G{c_[\uNj^47cbn8XߏKC#Ny7v@1WYzJJ7lb%džGss}6ey!XzM㥑Iu,p-B܁p<'ŠB}~#aT} nϑޡ)Dw]6koZ-.=GIԔy@hQ<>-Wneᵟ>,ٓ PUh,Oi`pH 3+ZIr{r!BguNTkKMq xON nݳ49g{ի ih.ꍇ笰JgƁqPtq>Izsnzjxt%2??Tw&蛧 3^]fȡ(w $^mGij@{cC+J~_pز2?b%kl6 Wmf^9R !ǂg#̋P?I'm'\#Fϴ4pxu^hv;&")Qr5j='ĪMyA]=a]k !j&7l\M&)NFVz{l̵ >W OnnvnzÛxSٛŷޟ=I5:Yaoh5*Q/F>+:hqf`]xzoA>H!0$4~y#i syNFG]P@߱edIid0Wt֟/g]G@~vQKEShZ=erƟ GwQ3WO/Ge2UNHZ`hL,qDR>L,ݗ}廁v)(K7|ko4}m+FZ|fz\KW]U 7+?cfk~^K:J㗸tZ/)wC6bcuP#zm:}!x.2#cpB#5z1L8F[v>Oc=hޤf—h3^ٻ+b|+7R!m4Lv+%~^Bj\ =G7RAJ|!{[gq|;ҔT1gl<Efܺ # s3Q6[4?nBG> "-ޢ=h;kEi];8eBU?3b}Iƣ3fIbdkjFmo!c(vl*w~+@ӹ]]G @Y.qd$9( nuCӶoh&N0rr=Ģ3n!NV m΅oI+['BK<ڋǒƍ!&]E&w7W~'o۽B} Gj@Gi=|4.*(sFP#\_owBW-Y֝‘E_:Y;CgpV+g T~.8}7FiѠ#ݤqD6=b (d{0V>ImcGƌBf鑍LlD` LMBCߚ)izao. &_0bLpʾ퓫eAױːu((0.mr)Z}_DSG= v;i*x4x;_5{nuE#|PcHn +Nn8Lf޴}|_j9j1׿)9&ZeQB]JM24tɓ#~Wj g9`ϋ]{gQytˈ8~FI3@UJ?@w~Dg V{F/ )Ci|QbITp 3Ɨ_-zoc/+J(kZݭSsǛnZ`_ʛ19-~=6yn+뽳hk|ˆٶ-|[ѭO_m %MH]ު$x|#iš;)X˽QnԊ͈y(ef'|vO֯wt% ǬܷԹ v]vUS#4晵jMQʐTAԠy [EYPA!(Cʐ!QA|y_~׺Zue=gϳ_y eW?Zl_FYX eΘu~l͇||Px(7M$ 9iP] @{tGD͒7vkyzupbƚ[fQPlPM& ִea(>_O7y7V{m,W xo[{4ae8:TA.%$NIw4|a;sBR*C5.cA)Co}F@îq+]@:8X*Am0W x{ƀ:zf3 "~>CfRY}פf .ߑ'O ܯ>|~,9zj!)^ZR?edL){wQ}5!/ r+=|-WFBGї˙S =ٶ1~Jԟ 7 s\׃w_s ͎eN+/Ŧ[GҘxp\Kv=?U޹#XCJo?ros$IЊh[O/ FR8HvM?@Uhz0x{^ 'T9Hwpg'.@OoOvyv{_]\r|nps~mˬǶVׂf25{{͛VEp8i_no=O;uʢ4[藒@Ji8WJeLnDMP+E2W*_Ē9n3gWzJf+z)k%XXxj/ :PZu ]$<w O+1!ͮYY([(zvϱ~҉>S}ۮ#`,YrJ%]Noq*.vnkn*=`?6{b*;5mXp7M<ǡ_uWhmx}Sg#ޟEاjս<-\B{x|~ԅ'>.^ Wt@7QQ WD^<wLg!~7:ʭQe N%&&~^3B)V^`B0oPDOΈ$}ȋƽ}VԢ~ S!ئ@V>%J%`}!6nx4蠚spmy'|o[{.ZwiZN;\Jz r׼`|wO*ձA߉d.B)@ԉEl NngQcqr>eu[d6tt?)7 J˘؉g@'}wolD >֨ʾ]KW|=%> ٌW#JF!]q?xƍzQq<OsUW:i֮)GQCm:uTs{$HrԊmMJScPzP{mrXP5_;XhZ.Q_?sߖTmpJEgCufGixL3:cԳ?ݸGd_n^W#'<۳łUهZ|tջtEuv ʊm.+CS^uߞ|P7#FkSs`RyHk_&`X^r77/| mF*oG<Ыu/{^EY:Ts/!ijݏB%gjd2o6qud=vRtlȔ~pM'&=`ދy@#+Lk ×kL Gd\e '6#;s]? tuwFo nw|Jpol&z:Aߗ?S/ A庂yɱL/w32MDO]R JDǨn̓ shoJ ^|AߜYTXlz$t,&DUڝVx: -.qI>O%Liwz=f>,pҒ5T2g6 I׋kZe V绪wmLd,@} ''egj' 4I>eH9%M=v~͞"`]'n zÌo3_˿HP= ٯ+@_3Z9y&rOay:uTj;XWNdzXߴm(@U%|xA$Y?WVNDݗAQt0;t^sկ˲ hp{EgI /zv.3ΞH$}@hOm'|!>tc^7"8I4|_e'v-L%j_~4q>w5^Gu) evh6ZK(mn0ԕ%vC{T]~UPv{ wx/Y1׼<, xpW^]-\P]fM7ߑc`xv#!R:ЮYӱi{ "SÎFÉ3K|9qwkjX$ab?`vvZ/K׮"/ݚhˈ`kVEҭOܿ 7G5|x8S\nݷ S A=c7XM}Z7 Al6Q JÜWփ6GG45.rP8e`|2Q7Ѣ-p44M݇xXv 1&?xGeaz/O->hiγq>}œ3nF=}ʃD4xJ~+3!t` ۜ;[=㹗>2#apDG?+.A*`gRID ~^%Fۖ'.$2~AGah_4z\ׯ'?>yyE={ǤW#&]RG:T\~n}8WW ^%SGX.~OywZ6es˯B}+To9ڴoE2.^ɋ۠# pB zg׷?%"M}`ٱ7dQn,ܐ؅z|lЙ%R<-ZSE,qz8WPV3^K.`K^Q?SW}4؁M>ڼXW6f5Io7SЖtX}<野lx/7*VjNy]AnKy;)j* ,$;2a;E06y!@ٗblJ /%~i;5XǿcQpB>ekW\%v&\%h-h6ߍG_]DD7[|W/p䪛o h2e6.2ڊ>#`iZs,JQvH gXJ hmŢ.Oס\_4@透*+.Lvyv^2>E33ب5U[3ܫY?Ac!닮S扥\0\vC%3UEo1,4U%Ef%!jG6z֧.B{)7Pjdilv8*UE ecUINTǫ@K9f7 =[;%SU^άŢdͷۤ~}/|s}NVvBt\j| o" Mw.xHizej/c Ơa{SU)#Ze}+OZ5X='>-R;5%Q㜭A_֘Gs: ?"ٝpor8xuZO>p֘L\Ǔ&sWY̼ߥ _j=>ct ~s=_}G[ [&/6혃'r5`t{gasq{j@G=p v$$@).yJH;C!ĖDׯ6'MOQjo,}=6C'K$ AmEG?~Y'$nU6 R"d\N2^|(|:< ZpQE,'WDv }r7`uKCU?U,փIWu/Aw[?ZLXѝf0^^x_s휟 = Ôn< x\@;@5I8{~Rx?J4، صmz{c œ=`Qg-R^{7dgg|H\ނeyztۃY[+XÞ AС3hо>j rzxBF97 usZ%|fau#8[K{G۸+^S^;}zKyE1d+hES!T^Yu g5%|]dO)-nTVU햟u`LϟwEt RC8ᛉg꺛"Zv5Dhiqih1e"Ȃͷc}Dmzi:&} т3&kw(8:NAkr~k iҕ?R-HL4a"gG^N 3^  A7~:]t}%6v-Tk^_)/o}7MNAu2 sWiNX-?sHwޒ 栚J%*)~c'QuO5;LX)T\ԢApfƄ$~5ڰ K9W?E t}z?`J9NNC]ĽbWB2zEjM=o"[C=q)`[&xShWE5Q?{V2h FN`9KIHt P)X3nzû$DGz{=7W?WsNϾ[5w,х`dRJw'͚ծ9h;B?20Ʀ5ϓK70pD΅1Zh5 4pm=c'RRY}RI_Id^{4 8 7qb)p8ҷe]PM"`ʰO|Ruu~կiCAT))= W fj/~ 7wYn ^춙+zti>K)=hXa'u2Sa"<3( QH[N4XCO^}J@k;":~`UrhBɛjx.fĠ%TM`_t}4ҏ}quhж%yˎuӎUQv^3"=o?l%?%k9th?c/ѻg'v%[xTSjߝ>X ڞj7yXo㸸d.x. ՃT~ϨOϵG9 ke9R<ء]l̉ ɷ:L켼7i,g'Y~A=MKƁ8IVg{Uvm,d]*C<9ιk nݼUN癁gS߭A.(>;:{*{Yn`-} =F?EEN+ ͒ FJ^itXۥ]_!iqCxh?9{:8in]V(~$ /Wd%S]p/g$wDC 'D~B{l~'Vk OɰoW?~=Gɴ <}~lЩeelBsYpPRJ@l= u?6شP"hhvzQko _zg)Ywwx;.OMӷ09gK$d\$J7ĆTp XT ޸Ks4fh&AiKRQZ4 /S.%9ē~nE^0N*x-ŋ>Bp+'g~Yn.J5/v 4yOsG{lSL,aU?= rgF O33TUrWkF9e^x0͞w+O!V[^ =me}Y7&=` 1`2h&v%a6)vyaU2%a_;wHY3H>ܻr_s?jX X{d]+P( AvVRz/XAdUAw3pFB˪Sͩc|wYOIV\y-Mr.MW9\(S~n{GvEɯ@/Jg5ˋVw{`yi0h۴WSGAur0#>Ԡ{L%TXv]#ȼT7Xӳ35H@;_Y-"犰'YW&}d#ƀ#.w_VVaBJ<3ƬG{οmyHULP=w7t4CjdJ]%{2mV'*MmS/2S^**-JZncVLYo38t*F 3s7~3˥{M)&8v<'L۫vM^h.':jfjɚ'r.8~ CGoe]>3qc MB?e6qc>/r/CZܰMX`PMKв6nyMdy?,\[FG^'q]#֊h"x-tG A:Õ`.z꿆ٞ;籪Kn^PJʺSd:!Pݑ{?KA_!]TO7smjlزuI;{4|(=ʬB8Rc?^^#gΖn۴8% =cU9$4ڞ%u:zNoL%:0`UWw&ω#nNsx=cB Ml)Hop(zL ߈!aYڐ]H+K%b=9=@mNseX='ˈO>捠]p5I\J__QOKлl3x 5Wo -k88oeQM[f,@s~֨sUǩ.#j=h::);x1xO 7/K;S.>=)k¿`^܆W 搚Q8wmy ^`0>sR2Oq+'~S6ߵrzۚ\{IꎃLJ)-% G]BA45i:/ߊg&W<:(h[q ghM~7[{eI:{Æ#x>jqϦIC|%`5-=jQI;Nőw];QNZ4]Gp5X.vo;%\d+7偹Ydʘzp)iGky'CB²';4cm'z{򘏑~[nJ]ϚI_zCi|(-If"W_?Kq8UKm&G/c @t1 -JY_DV5ɀ,v5e>6^Jo;olu'E3еubv˸ Zg`ZX}_EJNqkH=F*OLŵa;퐦=8jP,;8Ȣ۵kFP ]0s{+Yῧ*hq?) |"ҸlN = 4k z.qD'jwme<5+&XK\(V͗j(U%પ83 F륍bzy܃e\U0.-:V~O[ӈ7>]X]\__mb";IX֕@oHW GUá5kҙ[O!naKPB,@WW Z3ZՀ6ڍZ~D rO e5B4ٓfÛ[ I(% 6=Ǹۀ~ڧ_GlG{sbWwO;s&Xr;š||yZGgQrkw\`v`*z>P &it>]t ͺo ]TRMgk ۽`=Myr|2`NV\ AP:pO3ZJR3wo 5= 9 Ok0J2a6>rihװWz]]&zUR,b6iuYt j'.j lY) pG.wO~;hq':&E6k\e} f^oN!4WN,0\-Guqrhk:Cmtr$zY7Wbp3/.<rk;|[ȣVDk/5DWLޝ' J: [=ETBQS}#:&sƽC'[l(0;S%uj{J -_HkUjw2xj؟>o$}UӘbt~ضx +jV~EOjt;yϏ\SC3 n]mV∣ *$fK|\忘'-vC}6kU䧇QoՏ_ipd 򋘺Q`^iX t%}žDج‰wxRQsH/Qrմ5SV/\^tʗalo5`>@T\jWuP{`m;5n9u*-5J?\4xwEe ,~`%{"ҭY$cZtUN՚ `$9,#?}e,g;\&BI\Eݻ?7NhXq&͠9$3LۀG[LHu} b;(bޠ\Չy-穂8s&"sem2hϦn`U'ț JF+`NvZ+ ñv)2 ֲ#oE}Gy~ !}wG~s$sIh,x : oHLՓcW .#\ D5'A^.V9zF)h3_/[kbhry_Mќz~y#](ni xz8Hۏ`uDAXQ׆hRk}G@pBfLspp*HY ??i%h:3~%3ɃsŃQO f΋J{/[fRn: kϋFw_ov[)#c~6DTlsHuMDglriFoF?|hq,0SCkcRv#"DǺQEDoOA`Yē\lKEѲ`6Yp']Y |'o:HJ|CI]:#Q@צA-{z*Q+N^^Rw=!cpDZ>⬴k?:)؝*yOX4އșl*h?Eh sVk}Iq%dxIv?y4L`OKU{FO~?՛9nv"J"N/ Kp<E˳YO# _#aE^CgkPN˵^ (g]ރ@nRw.[ѤfY)(ռ%8>#xR.s~MZ7D?]x$rYOɷ- n>4Ityaz׼:X\NP?߿r)^:`] Pi̸`T, xߴ]ZJS'jn[PY)~PsqlXm08)^ψ,!i>=𕌮V:.]Mafh݋⋼ 8]cfYoGD{ъLqN,һJzǤ޶]E )S3<ɼ5x>Woʲd]akY@{mդ?xnrx"+珸iIvv`?fۇ߶ 0%`&k_GT(~Gpha@]c7؅XuV"_>@E]jM#k@=lk;TPUb%PłM۲'  vF=XZ+g1D!^L ^F}˔ ѲVe,P% ih6.0}w\ 뜣lxﴳn#>yFڥ\xhh4"*'j/[?L#xE8f㣘>hS(uBA]-7/RP4i(O}pq2xAwSS@o3 M-ȺxGԇ tPbrǔsϲϸAO*:kM43mWܲsd.*Q "Ofh>]~E^|*A?ksmw'}knFucSHO:V5!gdS7r,BZ?.Zk >nhҔ#Y ,ڎ뉧4P5_h{ks!@7hbt<7yaN F*y*5GΜt\Io @ 6=6~Bti%ƿ 6EwWbଋqܛfpI %>lY s.vKRwFH\sӬ[>un׊%38Ѳ*c2Юޞ 1%`ͮ7!Y]W3?LCÎTDtDCWd~s ܱteI>+4ѻEl: @ i mL|FpV螖 ݾ칩O O!x/sKr0y~mQ+qfA`-ޚ#';.Ə%s!ÈM|w-`yʨ ɐ1{v?^'YЌix5"~`췟$(*I])^! u+=#+;+B*n.:̉]tCR/6ُf;ZowN.u(|f)O["= 5ڢwv8]g5`{7N*wEHU`̸};W]@qnq]х_%w@ha^:>7o*TҟiGw!@Z>mkHk,JΘϰ/mzB jG8?6Œ za)F Ѩ #}U-Gq9fk<'XdmAzCk-]. [A %6 #" +B^; yB^FF!0NF!#Qߕq yed22 yB^FF!/QDx}vD7 (o7 B^FF!/#QDx}Q(ed7 BFa|B>DB^FF!/Q#Q?(Q(cdB>FaB>FF!0NFF!/#Q(oB>FaB>FF!/#Q(cd7 oB>FaB>F!#Q'#Q(edB>Aa|B^FF!/#Q?(Q(edB^FF!0>F!#Q(o(ed%"2j|!K"Q챣6G۝Uևٞ#_ _$yy93gD{ROCR/R/0000755000176000001440000000000012504526277011262 5ustar ripleyusersROCR/R/performance_measures.R0000644000176000001440000003741212504526277015621 0ustar ripleyusers## ------------------------------------------------------------------------ ## classical machine learning contingency table measures ## ------------------------------------------------------------------------ .performance.accuracy <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list( cutoffs, (tn+tp) / length(predictions) ) } .performance.error.rate <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list( cutoffs, (fn+fp) / length(predictions) ) } .performance.false.positive.rate <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list( cutoffs, fp / n.neg ) } .performance.true.positive.rate <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list( cutoffs, tp / n.pos ) } .performance.false.negative.rate <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list( cutoffs, fn / n.pos ) } .performance.true.negative.rate <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list( cutoffs, tn / n.neg ) } .performance.positive.predictive.value <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { ppv <- tp / (fp + tp) list( cutoffs, ppv ) } .performance.negative.predictive.value <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { npv <- tn / (tn + fn) list( cutoffs, npv ) } .performance.prediction.conditioned.fallout <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { ppv <- .performance.positive.predictive.value(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred)[[2]] list( cutoffs, 1 - ppv ) } .performance.prediction.conditioned.miss <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { npv <- .performance.negative.predictive.value(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred)[[2]] list( cutoffs, 1 - npv ) } ## ------------------------------------------------------------------------ ## ...not actually performance measures, but very useful as a second axis ## against which to plot a "real" performance measure ## (popular example: lift charts) ## ------------------------------------------------------------------------ .performance.rate.of.positive.predictions <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list( cutoffs, n.pos.pred / (n.pos + n.neg) ) } .performance.rate.of.negative.predictions <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list( cutoffs, n.neg.pred / (n.pos + n.neg) ) } ## ------------------------------------------------------------------------ ## Classical statistical contingency table measures ## ------------------------------------------------------------------------ .performance.phi <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list(cutoffs, (tn*tp - fn*fp) / sqrt(n.pos * n.neg * n.pos.pred * n.neg.pred) ) } .performance.mutual.information <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { n.samples <- n.pos + n.neg mi <- c() for (k in 1:length(cutoffs)) { kij <- rbind( c(tn[k],fn[k]), c(fp[k],tp[k]) ) ki.j. <- rbind(c(n.neg * n.neg.pred[k], n.neg.pred[k] * n.pos), c(n.neg * n.pos.pred[k], n.pos * n.pos.pred[k])) log.matrix <- log2( kij / ki.j.) log.matrix[kij/ki.j.==0] <- 0 mi <- c(mi, log2(n.samples) + sum( kij * log.matrix) / n.samples ) } list( cutoffs, mi ) } .performance.chisq <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { chisq <- c() for (i in 1:length(cutoffs)) { A <- rbind( c( tn[i], fn[i]), c(fp[i], tp[i]) ) chisq <- c(chisq, chisq.test(A, correct=FALSE)$statistic ) } list( cutoffs, chisq ) } .performance.odds.ratio <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { list( cutoffs, tp * tn / (fn * fp) ) } ## ------------------------------------------------------------------------ ## Other measures based on contingency tables ## ------------------------------------------------------------------------ .performance.lift <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { n.samples <- n.pos + n.neg list( cutoffs, (tp / n.pos) / (n.pos.pred / n.samples) ) } .performance.f <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred, alpha) { prec <- .performance.positive.predictive.value(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred)[[2]] list( cutoffs, 1/ ( alpha*(1/prec) + (1-alpha)*(1/(tp/n.pos)) ) ) } .performance.rocconvexhull <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { x <- fp / n.neg y <- tp / n.pos finite.bool <- is.finite(x) & is.finite(y) x <- x[ finite.bool ] y <- y[ finite.bool ] if (length(x) < 2) { stop("Not enough distinct predictions to compute ROC convex hull.") } ## keep only points on the convex hull ind <- chull(x, y) x.ch <- x[ind] y.ch <- y[ind] ## keep only convex hull points above the diagonal, except (0,0) ## and (1,1) ind.upper.triangle <- x.ch < y.ch x.ch <- c(0, x.ch[ind.upper.triangle], 1) y.ch <- c(0, y.ch[ind.upper.triangle], 1) ## sort remaining points by ascending x value ind <- order(x.ch) x.ch <- x.ch[ind] y.ch <- y.ch[ind] list( x.ch, y.ch ) } ## ---------------------------------------------------------------------------- ## Cutoff-independent measures ## ---------------------------------------------------------------------------- .performance.auc <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred, fpr.stop) { x <- fp / n.neg y <- tp / n.pos finite.bool <- is.finite(x) & is.finite(y) x <- x[ finite.bool ] y <- y[ finite.bool ] if (length(x) < 2) { stop(paste("Not enough distinct predictions to compute area", "under the ROC curve.")) } if (fpr.stop < 1) { ind <- max(which( x <= fpr.stop )) tpr.stop <- approxfun( x[ind:(ind+1)], y[ind:(ind+1)] )(fpr.stop) x <- c(x[1:ind], fpr.stop) y <- c(y[1:ind], tpr.stop) } ans <- list() auc <- 0 for (i in 2:length(x)) { auc <- auc + 0.5 * (x[i] - x[i-1]) * (y[i] + y[i-1]) } ans <- list( c(), auc) names(ans) <- c("x.values","y.values") return(ans) } .performance.precision.recall.break.even.point <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { pred <- prediction( predictions, labels) perf <- performance( pred, measure="prec", x.measure="rec") x <- rev(perf@x.values[[1]]) y <- rev(perf@y.values[[1]]) alpha <- rev(perf@alpha.values[[1]]) finite.bool <- is.finite(alpha) & is.finite(x) & is.finite(y) x <- x[ finite.bool ] y <- y[ finite.bool ] alpha <- alpha[ finite.bool ] if (length(x) < 2) { stop(paste("Not enough distinct predictions to compute", "precision/recall intersections.")) } intersection.cutoff <- c() intersection.pr <- c() ## find all intersection points by looking at all intervals (i,i+1): ## if the difference function between x and y has different signs at the ## interval boundaries, then an intersection point is in the interval; ## compute as the root of the difference function if ( (x[1]-y[1]) == 0) { intersection.cutoff <- c( alpha[1] ) intersection.pr <- c( x[1] ) } for (i in (1:(length(alpha)-1))) { if ((x[i+1]-y[i+1]) == 0) { intersection.cutoff <- c( intersection.cutoff, alpha[i+1] ) intersection.pr <- c( intersection.pr, x[i+1] ) } else if ((x[i]-y[i])*(x[i+1]-y[i+1]) < 0 ) { ans <- uniroot(approxfun(c(alpha[i], alpha[i+1] ), c(x[i]-y[i], x[i+1]-y[i+1])), c(alpha[i],alpha[i+1])) intersection.cutoff <- c(intersection.cutoff, ans$root) intersection.pr <- c(intersection.pr, ans$f.root) } } list( rev(intersection.cutoff), rev(intersection.pr) ) } .performance.calibration.error <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred, window.size) { if (window.size > length(predictions)) { stop("Window size exceeds number of predictions.") } if (min(predictions)<0 || max(predictions)>1) { stop("Calibration error needs predictions between 0 and 1") } pos.label <- levels(labels)[2] neg.label <- levels(labels)[1] ordering <- rev(order( predictions )) predictions <- predictions[ ordering ] labels <- labels[ ordering ] median.cutoffs <- c() calibration.errors <- c() for (left.index in 1 : (length(predictions) - window.size+1) ) { right.index <- left.index + window.size - 1 pos.fraction <- sum(labels[left.index : right.index] == pos.label) / window.size mean.prediction <- mean( predictions[ left.index : right.index ] ) calibration.errors <- c(calibration.errors, abs(pos.fraction - mean.prediction)) median.cutoffs <- c(median.cutoffs, median(predictions[left.index:right.index])) } list( median.cutoffs, calibration.errors ) } .performance.mean.cross.entropy <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { if (! all(levels(labels)==c(0,1)) || any(predictions<0) || any(predictions>1) ) { stop(paste("Class labels need to be 0 and 1 and predictions between", "0 and 1 for mean cross entropy.")) } pos.label <- levels(labels)[2] neg.label <- levels(labels)[1] list( c(), - 1/length(predictions) * (sum( log( predictions[which(labels==pos.label)] )) + sum( log( 1 - predictions[which(labels==neg.label)] ))) ) } .performance.root.mean.squared.error <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { ## convert labels from factor to numeric values labels <- as.numeric(levels(labels))[labels] if (any(is.na(labels))) { stop("For rmse predictions have to be numeric.") } list( c(), sqrt( 1/length(predictions) * sum( (predictions - labels)^2 )) ) } ## ---------------------------------------------------------------------------- ## Derived measures: ## ---------------------------------------------------------------------------- .performance.sar <- function( predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { pred <- prediction( predictions, labels) perf.acc <- performance( pred, measure="acc") perf.rmse <- performance( pred, measure="rmse") perf.auc <- performance( pred, measure="auc") list(cutoffs, 1/3 * (perf.acc@y.values[[1]] + (1 - perf.rmse@y.values[[1]]) + perf.auc@y.values[[1]])) } ## ---------------------------------------------------------------------------- ## Measures taking into account actual cost considerations ## ---------------------------------------------------------------------------- .performance.expected.cost <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred) { ## kick out suboptimal values (i.e. fpr/tpr pair for which another one ## with same fpr and higher tpr exists, ## or one for which one with same tpr but lower fpr exists if (n.neg==0 || n.pos==0) { stop(paste("At least one positive and one negative sample are", "needed to compute a cost curve.")) } fpr <- fp / n.neg tpr <- tp / n.pos ## sort by fpr (ascending), in case of ties by descending tpr ind <- order(fpr,-tpr) fpr <- fpr[ind] tpr <- tpr[ind] ## for tied fprs, only the one with the highest tpr is kept ind <- !duplicated(fpr) fpr <- fpr[ind] tpr <- tpr[ind] ## for tied tprs, only keep the one with the lowest fpr ind <- order(-tpr,fpr) fpr <- fpr[ind] tpr <- tpr[ind] ind <- !duplicated(tpr) fpr <- fpr[ind] tpr <- tpr[ind] if (!any(0==fpr & 0==tpr)) { fpr <- c(0,fpr) tpr <- c(0,tpr) } if (!any(1==fpr & 1==tpr)) { fpr <- c(fpr,1) tpr <- c(tpr,1) } ## compute all functions f <- list() for (i in 1:length(fpr)) { f <- c(f, .construct.linefunct( 0, fpr[i], 1, 1-tpr[i] )) } ## compute all intersection points x.values <- c() y.values <- c() for (i in 1:(length(fpr)-1)) { for (j in (i+1):length(fpr)) { ans <- .intersection.point( f[[i]], f[[j]] ) if (all(is.finite(ans))) { y.values.at.current.x <- c() for (k in 1:length(f)) { y.values.at.current.x <- c(y.values.at.current.x, f[[k]](ans[1])) } if (abs(ans[2] - min(y.values.at.current.x )) < sqrt(.Machine$double.eps)) { x.values <- c(x.values, ans[1]) y.values <- c(y.values, ans[2]) } } } } if (!any(0==x.values & 0==y.values)) { x.values <- c(0,x.values) y.values <- c(0,y.values) } if (!any(1==x.values & 0==y.values)) { x.values <- c(x.values,1) y.values <- c(y.values,0) } ind <- order( x.values) list( x.values[ind], y.values[ind] ) } .performance.cost <- function(predictions, labels, cutoffs, fp, tp, fn, tn, n.pos, n.neg, n.pos.pred, n.neg.pred, cost.fp, cost.fn) { n.samples <- n.pos + n.neg cost <- ((n.pos / n.samples) * (fn / n.pos) * cost.fn + (n.neg / n.samples) * (fp / n.neg) * cost.fp) list( cutoffs, cost ) } ROCR/R/ROCR_aux.R0000644000176000001440000000530712504526277013034 0ustar ripleyusers## --------------------------------------------------------------------------- ## Dealing with argument lists, especially '...' ## --------------------------------------------------------------------------- ## return list of selected arguments, skipping those that ## are not present in arglist .select.args <- function( arglist, args.to.select, complement=FALSE) { match.bool <- names(arglist) %in% args.to.select if (complement==TRUE) match.bool <- !match.bool return( arglist[ match.bool] ) } ## return arguments in arglist which match prefix, with prefix removed ## ASSUMPTION: prefix is separated from rest by a '.'; this is removed along ## with the prefix .select.prefix <- function( arglist, prefixes, complement=FALSE ) { match.expr <- paste(paste('(^',prefixes,'\\.)',sep=""),collapse='|') match.bool <- (1:length(arglist)) %in% grep( match.expr, names(arglist) ) if (complement==TRUE) match.bool <- !match.bool arglist <- arglist[ match.bool] names(arglist) <- sub( match.expr, '', names(arglist)) return( arglist ) } .garg <- function( arglist, arg, i=1) { if (is.list(arglist[[arg]])) arglist[[ arg ]][[i]] else arglist[[ arg ]] } .sarg <- function( arglist, ...) { ll <- list(...) for (argname in names(ll) ) { arglist[[ argname ]] <- ll[[ argname ]] } return(arglist) } .farg <- function( arglist, ...) { ll <- list(...) for (argname in names(ll) ) { if (length(arglist[[argname]])==0) arglist[[ argname ]] <- ll[[ argname ]] } return(arglist) } .slice.run <- function( arglist, runi=1) { r <- lapply( names(arglist), function(name) .garg( arglist, name, runi)) names(r) <- names(arglist) r } ## --------------------------------------------------------------------------- ## Line segments ## --------------------------------------------------------------------------- .construct.linefunct <- function( x1, y1, x2, y2) { if (x1==x2) { stop("Cannot construct a function from data.") } lf <- eval(parse(text=paste("function(x) {", "m <- (",y2,"-",y1,") / (",x2,"-",x1,");", "c <- ",y1," - m * ",x1,";", "return( m * x + c)}",sep=" "))) lf } .intersection.point <- function( f, g ) { ## if lines are parallel, no intersection point if (f(1)-f(0) == g(1)-g(0)) { return( c(Inf,Inf) ) } ## otherwise, choose search interval imin <- -1 imax <- 1 while (sign(f(imin)-g(imin)) == sign(f(imax)-g(imax))) { imin <- 2*imin imax <- 2*imax } h <- function(x) { f(x) - g(x) } intersect.x <- uniroot( h, interval=c(imin-1,imax+1) )$root intersect.y <- f( intersect.x ) return( c(intersect.x, intersect.y )) } ROCR/R/prediction.R0000644000176000001440000001500612504526277013547 0ustar ripleyusersprediction <- function(predictions, labels, label.ordering=NULL) { ## bring 'predictions' and 'labels' into list format, ## each list entry representing one x-validation run ## convert predictions into canonical list format if (is.data.frame(predictions)) { names(predictions) <- c() predictions <- as.list(predictions) } else if (is.matrix(predictions)) { predictions <- as.list(data.frame(predictions)) names(predictions) <- c() } else if (is.vector(predictions) && !is.list(predictions)) { predictions <- list(predictions) } else if (!is.list(predictions)) { stop("Format of predictions is invalid.") } ## if predictions is a list -> keep unaltered ## convert labels into canonical list format if (is.data.frame(labels)) { names(labels) <- c() labels <- as.list( labels) } else if (is.matrix(labels)) { labels <- as.list( data.frame( labels)) names(labels) <- c() } else if ((is.vector(labels) || is.ordered(labels) || is.factor(labels)) && !is.list(labels)) { labels <- list( labels) } else if (!is.list(labels)) { stop("Format of labels is invalid.") } ## if labels is a list -> keep unaltered ## Length consistency checks if (length(predictions) != length(labels)) stop(paste("Number of cross-validation runs must be equal", "for predictions and labels.")) if (! all(sapply(predictions, length) == sapply(labels, length))) stop(paste("Number of predictions in each run must be equal", "to the number of labels for each run.")) ## only keep prediction/label pairs that are finite numbers for (i in 1:length(predictions)) { finite.bool <- is.finite( predictions[[i]] ) predictions[[i]] <- predictions[[i]][ finite.bool ] labels[[i]] <- labels[[i]][ finite.bool ] } ## abort if 'labels' format is inconsistent across ## different cross-validation runs label.format="" ## one of 'normal','factor','ordered' if (all(sapply( labels, is.factor)) && !any(sapply(labels, is.ordered))) { label.format <- "factor" } else if (all(sapply( labels, is.ordered))) { label.format <- "ordered" } else if (all(sapply( labels, is.character)) || all(sapply( labels, is.numeric)) || all(sapply( labels, is.logical))) { label.format <- "normal" } else { stop(paste("Inconsistent label data type across different", "cross-validation runs.")) } ## abort if levels are not consistent across different ## cross-validation runs if (! all(sapply(labels, levels)==levels(labels[[1]])) ) { stop(paste("Inconsistent factor levels across different", "cross-validation runs.")) } ## convert 'labels' into ordered factors, aborting if the number ## of classes is not equal to 2. levels <- c() if ( label.format == "ordered" ) { if (!is.null(label.ordering)) { stop(paste("'labels' is already ordered. No additional", "'label.ordering' must be supplied.")) } else { levels <- levels(labels[[1]]) } } else { if ( is.null( label.ordering )) { if ( label.format == "factor" ) levels <- sort(levels(labels[[1]])) else levels <- sort( unique( unlist( labels))) } else { ## if (!setequal( levels, label.ordering)) { if (!setequal( unique(unlist(labels)), label.ordering )) { stop("Label ordering does not match class labels.") } levels <- label.ordering } for (i in 1:length(labels)) { if (is.factor(labels)) labels[[i]] <- ordered(as.character(labels[[i]]), levels=levels) else labels[[i]] <- ordered( labels[[i]], levels=levels) } } if (length(levels) != 2) { message <- paste("Number of classes is not equal to 2.\n", "ROCR currently supports only evaluation of ", "binary classification tasks.",sep="") stop(message) } ## determine whether predictions are continuous or categorical ## (in the latter case stop; scheduled for the next ROCR version) if (!is.numeric( unlist( predictions ))) { stop("Currently, only continuous predictions are supported by ROCR.") } ## compute cutoff/fp/tp data cutoffs <- list() fp <- list() tp <- list() fn <- list() tn <- list() n.pos <- list() n.neg <- list() n.pos.pred <- list() n.neg.pred <- list() for (i in 1:length(predictions)) { n.pos <- c( n.pos, sum( labels[[i]] == levels[2] )) n.neg <- c( n.neg, sum( labels[[i]] == levels[1] )) ans <- .compute.unnormalized.roc.curve( predictions[[i]], labels[[i]] ) cutoffs <- c( cutoffs, list( ans$cutoffs )) fp <- c( fp, list( ans$fp )) tp <- c( tp, list( ans$tp )) fn <- c( fn, list( n.pos[[i]] - tp[[i]] )) tn <- c( tn, list( n.neg[[i]] - fp[[i]] )) n.pos.pred <- c(n.pos.pred, list(tp[[i]] + fp[[i]]) ) n.neg.pred <- c(n.neg.pred, list(tn[[i]] + fn[[i]]) ) } return( new("prediction", predictions=predictions, labels=labels, cutoffs=cutoffs, fp=fp, tp=tp, fn=fn, tn=tn, n.pos=n.pos, n.neg=n.neg, n.pos.pred=n.pos.pred, n.neg.pred=n.neg.pred)) } ## fast fp/tp computation based on cumulative summing .compute.unnormalized.roc.curve <- function( predictions, labels ) { ## determine the labels that are used for the pos. resp. neg. class : pos.label <- levels(labels)[2] neg.label <- levels(labels)[1] pred.order <- order(predictions, decreasing=TRUE) predictions.sorted <- predictions[pred.order] tp <- cumsum(labels[pred.order]==pos.label) fp <- cumsum(labels[pred.order]==neg.label) ## remove fp & tp for duplicated predictions ## as duplicated keeps the first occurrence, but we want the last, two ## rev are used. ## Highest cutoff (Infinity) corresponds to tp=0, fp=0 dups <- rev(duplicated(rev(predictions.sorted))) tp <- c(0, tp[!dups]) fp <- c(0, fp[!dups]) cutoffs <- c(Inf, predictions.sorted[!dups]) return(list( cutoffs=cutoffs, fp=fp, tp=tp )) } ROCR/R/performance_plots.R0000644000176000001440000006030012504526277015126 0ustar ripleyusers## ---------------------------------------------------------------------------- ## plot method for objects of class 'performance' ## ---------------------------------------------------------------------------- .get.arglist <- function( fname, arglist ) { if (fname=='plot') return(.select.args(arglist, union(names(formals(plot.default)), names(par())))) else if (fname=='plot.xy') return(.select.args(arglist, union( names(formals(plot.xy)), names(par())))) else return( .select.prefix( arglist, fname) ) } .downsample <- function( perf, downsampling ) { for (i in 1:length(perf@alpha.values)) { if (downsampling < 1 && downsampling > 0) ind <- round(seq(1, length(perf@alpha.values[[i]]), length=(length(perf@alpha.values[[i]]) * downsampling))) else if (downsampling > 1) ind <- round(seq(1, length(perf@alpha.values[[i]]), length=downsampling)) else ind <- 1:length(perf@alpha.values[[i]]) perf@alpha.values[[i]] <- perf@alpha.values[[i]][ind] perf@x.values[[i]] <- perf@x.values[[i]][ind] perf@y.values[[i]] <- perf@y.values[[i]][ind] } return(perf) } .plot.performance <- function(perf, ..., avg="none", spread.estimate="none", spread.scale=1, show.spread.at=c(), colorize=FALSE, colorize.palette=rev(rainbow(256,start=0, end=4/6)), colorkey=colorize, colorkey.relwidth=0.25, colorkey.pos="right", print.cutoffs.at=c(), cutoff.label.function=function(x) { round(x,2) }, downsampling=0, add=FALSE) { arglist <- c(lapply( as.list(environment()), eval ), list(...) ) if (length(perf@y.values) != length(perf@x.values)) { stop("Performance object cannot be plotted.") } if (is.null(perf@alpha.values) && (colorize==TRUE || length(print.cutoffs.at)>0)) { stop(paste("Threshold coloring or labeling cannot be performed:", "performance object has no threshold information.")) } if ((avg=="vertical" || avg=="horizontal") && (colorize==TRUE || length(print.cutoffs.at)>0)) { stop(paste("Threshold coloring or labeling is only well-defined for", "'no' or 'threshold' averaging.")) } if (downsampling >0 ) perf <- .downsample( perf, downsampling) ## for infinite cutoff, assign maximal finite cutoff + mean difference ## between adjacent cutoff pairs if (length(perf@alpha.values)!=0) perf@alpha.values <- lapply(perf@alpha.values, function(x) { isfin <- is.finite(x); x[is.infinite(x)] <- (max(x[isfin]) + mean(abs(x[isfin][-1] - x[isfin][-length(x[isfin])]))); x } ) ## remove samples with x or y not finite for (i in 1:length(perf@x.values)) { ind.bool <- (is.finite(perf@x.values[[i]]) & is.finite(perf@y.values[[i]])) if (length(perf@alpha.values)>0) perf@alpha.values[[i]] <- perf@alpha.values[[i]][ind.bool] perf@x.values[[i]] <- perf@x.values[[i]][ind.bool] perf@y.values[[i]] <- perf@y.values[[i]][ind.bool] } arglist <- .sarg( arglist, perf=perf) if (add==FALSE) do.call( ".performance.plot.canvas", arglist ) if (avg=="none") do.call(".performance.plot.no.avg", arglist) else if (avg=="vertical") do.call(".performance.plot.vertical.avg", arglist) else if (avg=="horizontal") do.call(".performance.plot.horizontal.avg", arglist) else if (avg=="threshold") do.call(".performance.plot.threshold.avg", arglist) } ## --------------------------------------------------------------------------- ## initializing plots and plotting a canvas ## (can be skipped using 'plot( ..., add=TRUE)' ## --------------------------------------------------------------------------- .performance.plot.canvas <- function(perf, avg, ...) { arglist <- list(...) axis.names <- list(x=perf@x.name, y=perf@y.name) if (avg=="horizontal" || avg=="threshold") axis.names$x <- paste("Average", tolower(axis.names$x)) if (avg=="vertical" || avg=="threshold") axis.names$y <- paste("Average", tolower(axis.names$y)) arglist <- .farg(arglist, xlab=axis.names$x, ylab=axis.names$y) arglist <- .farg(arglist, xlim=c(min(unlist(perf@x.values)), max(unlist(perf@x.values))), ylim=c(min(unlist(perf@y.values)), max(unlist(perf@y.values)))) do.call("plot", .sarg(.slice.run(.get.arglist('plot', arglist)), x=0.5, y=0.5, type='n', axes=FALSE)) do.call( "axis", .sarg(.slice.run(.get.arglist('xaxis', arglist)), side=1)) do.call( "axis", .sarg(.slice.run(.get.arglist('yaxis', arglist)), side=2)) if (.garg(arglist,'colorkey')==TRUE) { colors <- rev( .garg(arglist,'colorize.palette') ) max.alpha <- max(unlist(perf@alpha.values)) min.alpha <- min(unlist(perf@alpha.values)) col.cutoffs <- rev(seq(min.alpha,max.alpha, length=length( colors ))) if ( .garg(arglist,'colorkey.pos')=="right") { ## axis drawing (ticks + labels) ## The interval [min.alpha,max.alpha] needs to be mapped onto ## the interval [min.y,max.y], rather than onto the interval ## [ylim[1],ylim[2]] ! In the latter case, NAs could occur in ## approxfun below, because axTicks can be out of the ylim-range ## ('yxaxs': 4%region) max.y <- max(axTicks(4)) min.y <- min(axTicks(4)) alpha.ticks <- .garg( arglist, c("coloraxis.at")) if (length(alpha.ticks)==0) alpha.ticks <- approxfun(c(min.y, max.y), c(min.alpha, max.alpha)) ( axTicks(4)) alpha2y <- approxfun(c(min(alpha.ticks), max(alpha.ticks)), c(min.y,max.y)) arglist <- .sarg(arglist, coloraxis.labels=.garg(arglist, 'cutoff.label.function')(alpha.ticks), coloraxis.at=alpha2y(alpha.ticks)) do.call("axis", .sarg(.slice.run(.get.arglist('coloraxis', arglist)), side=4)) ## draw colorkey ## each entry in display.bool corresponds to one rectangle of ## the colorkey. ## Only rectangles within the alpha.ticks range are plotted. ## y.lower, y.upper, and colors, are the attributes of the visible ## rectangles (those for which display.bool=TRUE) display.bool <- (col.cutoffs >= min(alpha.ticks) & col.cutoffs < max(alpha.ticks)) y.lower <- alpha2y( col.cutoffs )[display.bool] colors <- colors[display.bool] if (length(y.lower>=2)) { y.width <- y.lower[2] - y.lower[1] y.upper <- y.lower + y.width x.left <- .garg(arglist,'xlim')[2] + ((.garg(arglist,'xlim')[2] - .garg(arglist,'xlim')[1]) * (1-.garg(arglist,'colorkey.relwidth'))*0.04) x.right <- .garg(arglist,'xlim')[2] + (.garg(arglist,'xlim')[2] -.garg(arglist,'xlim')[1]) * 0.04 rect(x.left, y.lower, x.right, y.upper, col=colors, border=colors,xpd=NA) } } else if (.garg(arglist, 'colorkey.pos') == "top") { ## axis drawing (ticks + labels) max.x <- max(axTicks(3)) min.x <- min(axTicks(3)) alpha.ticks <- .garg( arglist, c("coloraxis.at")) if (length(alpha.ticks)==0) { alpha.ticks <- approxfun(c(min.x, max.x), c(min.alpha, max.alpha))(axTicks(3)) } alpha2x <- approxfun(c( min(alpha.ticks), max(alpha.ticks)), c( min.x, max.x)) arglist <- .sarg(arglist, coloraxis.labels=.garg(arglist, 'cutoff.label.function')(alpha.ticks), coloraxis.at= alpha2x(alpha.ticks)) do.call("axis", .sarg(.slice.run( .get.arglist('coloraxis', arglist)), side=3)) ## draw colorkey display.bool <- (col.cutoffs >= min(alpha.ticks) & col.cutoffs < max(alpha.ticks)) x.left <- alpha2x( col.cutoffs )[display.bool] colors <- colors[display.bool] if (length(x.left)>=2) { x.width <- x.left[2] - x.left[1] x.right <- x.left + x.width y.lower <- .garg(arglist,'ylim')[2] + (.garg(arglist,'ylim')[2] - .garg(arglist,'ylim')[1]) * (1-.garg(arglist,'colorkey.relwidth'))*0.04 y.upper <- .garg(arglist,'ylim')[2] + (.garg(arglist,'ylim')[2] - .garg(arglist,'ylim')[1]) * 0.04 rect(x.left, y.lower, x.right, y.upper, col=colors, border=colors, xpd=NA) } } } do.call( "box", .slice.run( .get.arglist( 'box', arglist))) } ## ---------------------------------------------------------------------------- ## plotting performance objects when no curve averaging is wanted ## ---------------------------------------------------------------------------- .performance.plot.no.avg <- function( perf, ... ) { arglist <- list(...) arglist <- .farg(arglist, type= 'l') if (.garg(arglist, 'colorize') == TRUE) { colors <- rev( .garg( arglist, 'colorize.palette') ) max.alpha <- max(unlist(perf@alpha.values)) min.alpha <- min(unlist(perf@alpha.values)) col.cutoffs <- rev(seq(min.alpha,max.alpha, length=length(colors)+1)) col.cutoffs <- col.cutoffs[2:length(col.cutoffs)] } for (i in 1:length(perf@x.values)) { if (.garg(arglist, 'colorize') == FALSE) { do.call("plot.xy", .sarg(.slice.run(.get.arglist('plot.xy', arglist), i), xy=(xy.coords(perf@x.values[[i]], perf@y.values[[i]])))) } else { for (j in 1:(length(perf@x.values[[i]])-1)) { segment.coloring <- colors[min(which(col.cutoffs <= perf@alpha.values[[i]][j]))] do.call("plot.xy", .sarg(.slice.run(.get.arglist('plot.xy', arglist), i), xy=(xy.coords(perf@x.values[[i]][j:(j+1)], perf@y.values[[i]][j:(j+1)])), col= segment.coloring)) } } print.cutoffs.at <- .garg(arglist, 'print.cutoffs.at',i) if (! is.null(print.cutoffs.at)) { text.x <- approxfun(perf@alpha.values[[i]], perf@x.values[[i]], rule=2, ties=mean)(print.cutoffs.at) text.y <- approxfun(perf@alpha.values[[i]], perf@y.values[[i]], rule=2, ties=mean)(print.cutoffs.at) do.call("points", .sarg(.slice.run(.get.arglist('points', arglist),i), x= text.x, y= text.y)) do.call("text", .farg(.slice.run( .get.arglist('text', arglist),i), x= text.x, y= text.y, labels=(.garg(arglist, 'cutoff.label.function', i)(print.cutoffs.at)))) } } } ## ---------------------------------------------------------------------------- ## plotting performance objects when vertical curve averaging is wanted ## ---------------------------------------------------------------------------- .performance.plot.vertical.avg <- function( perf, ...) { arglist <- list(...) arglist <- .farg(arglist, show.spread.at= (seq(min(unlist(perf@x.values)), max(unlist(perf@x.values)), length=11))) perf.avg <- perf x.values <- seq(min(unlist(perf@x.values)), max(unlist(perf@x.values)), length=max( sapply(perf@x.values, length))) for (i in 1:length(perf@y.values)) { perf.avg@y.values[[i]] <- approxfun(perf@x.values[[i]], perf@y.values[[i]], ties=mean, rule=2)(x.values) } perf.avg@y.values <- list(rowMeans( data.frame( perf.avg@y.values ))) perf.avg@x.values <- list(x.values) perf.avg@alpha.values <- list() ## y.values at show.spread.at (midpoint of error bars ) show.spread.at.y.values <- lapply(as.list(1:length(perf@x.values)), function(i) { approxfun(perf@x.values[[i]], perf@y.values[[i]], rule=2, ties=mean)( .garg(arglist, 'show.spread.at')) }) show.spread.at.y.values <- as.matrix(data.frame(show.spread.at.y.values )) colnames(show.spread.at.y.values) <- c() ## now, show.spread.at.y.values[i,] contains the curve y values at the ## sampling x value .garg(arglist,'show.spread.at')[i] if (.garg(arglist, 'spread.estimate') == "stddev" || .garg(arglist, 'spread.estimate') == "stderror") { bar.width <- apply(show.spread.at.y.values, 1, sd) if (.garg(arglist, 'spread.estimate') == "stderror") { bar.width <- bar.width / sqrt( ncol(show.spread.at.y.values) ) } bar.width <- .garg(arglist, 'spread.scale') * bar.width suppressWarnings(do.call("plotCI", .farg(.sarg(.get.arglist( 'plotCI', arglist), x=.garg(arglist, 'show.spread.at'), y=rowMeans( show.spread.at.y.values), uiw= bar.width, liw= bar.width, err= 'y', add= TRUE), gap= 0, type= 'n'))) } if (.garg(arglist, 'spread.estimate') == "boxplot") { do.call("boxplot", .farg(.sarg(.get.arglist( 'boxplot', arglist), x= data.frame(t(show.spread.at.y.values)), at= .garg(arglist, 'show.spread.at'), add= TRUE, axes= FALSE), boxwex= (1/(2*(length(.garg(arglist, 'show.spread.at'))))))) do.call("points", .sarg(.get.arglist( 'points', arglist), x= .garg(arglist, 'show.spread.at'), y= rowMeans(show.spread.at.y.values))) } do.call( ".plot.performance", .sarg(arglist, perf= perf.avg, avg= 'none', add= TRUE)) } ## ---------------------------------------------------------------------------- ## plotting performance objects when horizontal curve averaging is wanted ## ---------------------------------------------------------------------------- .performance.plot.horizontal.avg <- function( perf, ...) { arglist <- list(...) arglist <- .farg(arglist, show.spread.at= seq(min(unlist(perf@y.values)), max(unlist(perf@y.values)), length=11)) perf.avg <- perf y.values <- seq(min(unlist(perf@y.values)), max(unlist(perf@y.values)), length=max( sapply(perf@y.values, length))) for (i in 1:length(perf@x.values)) { perf.avg@x.values[[i]] <- approxfun(perf@y.values[[i]], perf@x.values[[i]], ties=mean, rule=2)(y.values) } perf.avg@x.values <- list(rowMeans( data.frame( perf.avg@x.values ))) perf.avg@y.values <- list(y.values) perf.avg@alpha.values <- list() ## x.values at show.spread.at (midpoint of error bars ) show.spread.at.x.values <- lapply(as.list(1:length(perf@y.values)), function(i) { approxfun(perf@y.values[[i]], perf@x.values[[i]], rule=2, ties=mean)(.garg(arglist,'show.spread.at')) } ) show.spread.at.x.values <- as.matrix(data.frame(show.spread.at.x.values)) colnames(show.spread.at.x.values) <- c() ## now, show.spread.at.x.values[i,] contains the curve x values at the ## sampling y value .garg(arglist,'show.spread.at')[i] if (.garg(arglist,'spread.estimate') == 'stddev' || .garg(arglist,'spread.estimate') == 'stderror') { bar.width <- apply(show.spread.at.x.values, 1, sd) if (.garg(arglist,'spread.estimate')== 'stderror') { bar.width <- bar.width / sqrt( ncol(show.spread.at.x.values) ) } bar.width <- .garg(arglist,'spread.scale') * bar.width suppressWarnings(do.call("plotCI", .farg(.sarg(.get.arglist( 'plotCI', arglist), x= rowMeans( show.spread.at.x.values), y= .garg(arglist, 'show.spread.at'), uiw= bar.width, liw= bar.width, err= 'x', add= TRUE), gap= 0, type= 'n'))) } if (.garg(arglist,'spread.estimate') == "boxplot") { do.call("boxplot", .farg(.sarg(.get.arglist( 'boxplot', arglist), x= data.frame(t(show.spread.at.x.values)), at= .garg(arglist,'show.spread.at'), add= TRUE, axes= FALSE, horizontal= TRUE), boxwex= 1/(2*(length(.garg(arglist,'show.spread.at')))))) do.call("points", .sarg(.get.arglist( 'points', arglist), x= rowMeans(show.spread.at.x.values), y= .garg(arglist,'show.spread.at'))) } do.call( ".plot.performance", .sarg(arglist, perf= perf.avg, avg= 'none', add= TRUE)) } ## ---------------------------------------------------------------------------- ## plotting performance objects when threshold curve averaging is wanted ## ---------------------------------------------------------------------------- .performance.plot.threshold.avg <- function( perf, ...) { arglist <- list(...) arglist <- .farg(arglist, show.spread.at= seq(min(unlist(perf@x.values)), max(unlist(perf@x.values)), length=11)) perf.sampled <- perf alpha.values <- rev(seq(min(unlist(perf@alpha.values)), max(unlist(perf@alpha.values)), length=max( sapply(perf@alpha.values, length)))) for (i in 1:length(perf.sampled@y.values)) { perf.sampled@x.values[[i]] <- approxfun(perf@alpha.values[[i]],perf@x.values[[i]], rule=2, ties=mean)(alpha.values) perf.sampled@y.values[[i]] <- approxfun(perf@alpha.values[[i]], perf@y.values[[i]], rule=2, ties=mean)(alpha.values) } ## compute average curve perf.avg <- perf.sampled perf.avg@x.values <- list( rowMeans( data.frame( perf.avg@x.values))) perf.avg@y.values <- list(rowMeans( data.frame( perf.avg@y.values))) perf.avg@alpha.values <- list( alpha.values ) x.values.spread <- lapply(as.list(1:length(perf@x.values)), function(i) { approxfun(perf@alpha.values[[i]], perf@x.values[[i]], rule=2, ties=mean)(.garg(arglist,'show.spread.at')) } ) x.values.spread <- as.matrix(data.frame( x.values.spread )) y.values.spread <- lapply(as.list(1:length(perf@y.values)), function(i) { approxfun(perf@alpha.values[[i]], perf@y.values[[i]], rule=2, ties=mean)(.garg(arglist,'show.spread.at')) } ) y.values.spread <- as.matrix(data.frame( y.values.spread )) if (.garg(arglist,'spread.estimate')=="stddev" || .garg(arglist,'spread.estimate')=="stderror") { x.bar.width <- apply(x.values.spread, 1, sd) y.bar.width <- apply(y.values.spread, 1, sd) if (.garg(arglist,'spread.estimate')=="stderror") { x.bar.width <- x.bar.width / sqrt( ncol(x.values.spread) ) y.bar.width <- y.bar.width / sqrt( ncol(x.values.spread) ) } x.bar.width <- .garg(arglist,'spread.scale') * x.bar.width y.bar.width <- .garg(arglist,'spread.scale') * y.bar.width suppressWarnings( do.call("plotCI", .farg(.sarg(.get.arglist( 'plotCI', arglist), x= rowMeans(x.values.spread), y= rowMeans(y.values.spread), uiw= x.bar.width, liw= x.bar.width, err= 'x', add= TRUE), gap= 0, type= 'n'))) suppressWarnings( do.call("plotCI", .farg(.sarg(.get.arglist( 'plotCI', arglist), x= rowMeans(x.values.spread), y= rowMeans(y.values.spread), uiw= y.bar.width, liw= y.bar.width, err= 'y', add= TRUE), gap= 0, type= 'n'))) } if (.garg(arglist,'spread.estimate')=="boxplot") { do.call("boxplot", .farg(.sarg(.get.arglist('boxplot', arglist), x= data.frame(t(x.values.spread)), at= rowMeans(y.values.spread), add= TRUE, axes= FALSE, horizontal= TRUE), boxwex= 1/(2*(length(.garg(arglist,'show.spread.at')))))) do.call("boxplot", .farg(.sarg(.get.arglist('boxplot', arglist), x= data.frame(t(y.values.spread)), at= rowMeans(x.values.spread), add= TRUE, axes= FALSE), boxwex= 1/(2*(length(.garg(arglist,'show.spread.at')))))) do.call("points", .sarg(.get.arglist('points', arglist), x= rowMeans(x.values.spread), y= rowMeans(y.values.spread))) } do.call( ".plot.performance", .sarg(arglist, perf= perf.avg, avg= 'none', add= TRUE)) } ROCR/R/zzz.R0000644000176000001440000000432012504526277012241 0ustar ripleyuserssetClass("prediction", representation(predictions = "list", labels = "list", cutoffs = "list", fp = "list", tp = "list", tn = "list", fn = "list", n.pos = "list", n.neg = "list", n.pos.pred = "list", n.neg.pred = "list")) setClass("performance", representation(x.name = "character", y.name = "character", alpha.name = "character", x.values = "list", y.values = "list", alpha.values = "list" )) #setMethod("plot",signature(x="performance",y="missing"), # function(x,y,...) { # .plot.performance(x,...) # }) setMethod("plot",signature(x="performance",y="missing"), function(x,y,..., avg="none", spread.estimate="none", spread.scale=1, show.spread.at=c(), colorize=FALSE, colorize.palette=rev(rainbow(256,start=0, end=4/6)), colorkey=colorize, colorkey.relwidth=0.25, colorkey.pos="right", print.cutoffs.at=c(), cutoff.label.function=function(x) { round(x,2) }, downsampling=0, add=FALSE ) { .plot.performance(x,..., avg= avg, spread.estimate= spread.estimate, spread.scale= spread.scale, show.spread.at= show.spread.at, colorize= colorize, colorize.palette= colorize.palette, colorkey= colorkey, colorkey.relwidth= colorkey.relwidth, colorkey.pos= colorkey.pos, print.cutoffs.at= print.cutoffs.at, cutoff.label.function= cutoff.label.function, downsampling= downsampling, add= add) }) ## .First.lib <- function( libname, pkgname, where) { ## if (!require(methods)) { ## stop("Require Methods package") ## } ## if (!require(gplots)) { ## stop("Require gplots package") ## } ## where <- match(paste("package:",pkgname, sep=""), search()) ## } ROCR/R/performance.R0000644000176000001440000003474512504526277013723 0ustar ripleyusersperformance <- function(prediction.obj, measure, x.measure="cutoff", ...) { ## define the needed environments envir.list <- .define.environments() long.unit.names <- envir.list$long.unit.names function.names <- envir.list$function.names obligatory.x.axis <- envir.list$obligatory.x.axis optional.arguments <- envir.list$optional.arguments default.values <- envir.list$default.values ## abort in case of misuse if (class(prediction.obj) != 'prediction' || !exists(measure, where=long.unit.names, inherits=FALSE) || !exists(x.measure, where=long.unit.names, inherits=FALSE)) { stop(paste("Wrong argument types: First argument must be of type", "'prediction'; second and optional third argument must", "be available performance measures!")) } ## abort, if attempt is made to use a measure that has an obligatory ## x.axis as the x.measure (cannot be combined) if (exists( x.measure, where=obligatory.x.axis, inherits=FALSE )) { message <- paste("The performance measure", x.measure, "can only be used as 'measure', because it has", "the following obligatory 'x.measure':\n", get( x.measure, envir=obligatory.x.axis)) stop(message) } ## if measure is a performance measure with obligatory x.axis, then ## enforce this axis: if (exists( measure, where=obligatory.x.axis, inherits=FALSE )) { x.measure <- get( measure, envir=obligatory.x.axis ) } if (x.measure == "cutoff" || exists( measure, where=obligatory.x.axis, inherits=FALSE )) { ## fetch from '...' any optional arguments for the performance ## measure at hand that are given, otherwise fill up the default values optional.args <- list(...) argnames <- c() if ( exists( measure, where=optional.arguments, inherits=FALSE )) { argnames <- get( measure, envir=optional.arguments ) default.arglist <- list() for (i in 1:length(argnames)) { default.arglist <- c(default.arglist, get(paste(measure,":",argnames[i],sep=""), envir=default.values, inherits=FALSE)) } names(default.arglist) <- argnames for (i in 1:length(argnames)) { templist <- list(optional.args, default.arglist[[i]]) names(templist) <- c('arglist', argnames[i]) optional.args <- do.call('.farg', templist) } } optional.args <- .select.args( optional.args, argnames ) ## determine function name function.name <- get( measure, envir=function.names ) ## for each x-validation run, compute the requested performance measure x.values <- list() y.values <- list() for (i in 1:length( prediction.obj@predictions )) { argumentlist <- .sarg(optional.args, predictions= prediction.obj@predictions[[i]], labels= prediction.obj@labels[[i]], cutoffs= prediction.obj@cutoffs[[i]], fp= prediction.obj@fp[[i]], tp= prediction.obj@tp[[i]], fn= prediction.obj@fn[[i]], tn= prediction.obj@tn[[i]], n.pos= prediction.obj@n.pos[[i]], n.neg= prediction.obj@n.neg[[i]], n.pos.pred= prediction.obj@n.pos.pred[[i]], n.neg.pred= prediction.obj@n.neg.pred[[i]]) ans <- do.call( function.name, argumentlist ) if (!is.null(ans[[1]])) x.values <- c( x.values, list( ans[[1]] )) y.values <- c( y.values, list( ans[[2]] )) } if (! (length(x.values)==0 || length(x.values)==length(y.values)) ) { stop("Consistency error.") } ## create a new performance object return( new("performance", x.name = get( x.measure, envir=long.unit.names ), y.name = get( measure, envir=long.unit.names ), alpha.name = "none", x.values = x.values, y.values = y.values, alpha.values = list() )) } else { perf.obj.1 <- performance( prediction.obj, measure=x.measure, ... ) perf.obj.2 <- performance( prediction.obj, measure=measure, ... ) return( .combine.performance.objects( perf.obj.1, perf.obj.2 ) ) } } .combine.performance.objects <- function( p.obj.1, p.obj.2 ) { ## some checks for misusage (in any way, this function is ## only for internal use) if ( p.obj.1@x.name != p.obj.2@x.name ) { stop("Error: Objects need to have identical x axis.") } if ( p.obj.1@alpha.name != "none" || p.obj.2@alpha.name != "none") { stop("Error: At least one of the two objects has already been merged.") } if (length(p.obj.1@x.values) != length(p.obj.2@x.values)) { stop(paste("Only performance objects with identical number of", "cross-validation runs can be combined.")) } x.values <- list() x.name <- p.obj.1@y.name y.values <- list() y.name <- p.obj.2@y.name alpha.values <- list() alpha.name <- p.obj.1@x.name for (i in 1:length( p.obj.1@x.values )) { x.values.1 <- p.obj.1@x.values[[i]] y.values.1 <- p.obj.1@y.values[[i]] x.values.2 <- p.obj.2@x.values[[i]] y.values.2 <- p.obj.2@y.values[[i]] ## cutoffs of combined object = merged cutoffs of simple objects cutoffs <- sort( unique( c(x.values.1, x.values.2)), decreasing=TRUE ) ## calculate y.values at cutoffs using step function y.values.int.1 <- approxfun(x.values.1, y.values.1, method="constant",f=1,rule=2)(cutoffs) y.values.int.2 <- approxfun(x.values.2, y.values.2, method="constant",f=1,rule=2)(cutoffs) ## 'approxfun' ignores NA and NaN objs <- list( y.values.int.1, y.values.int.2) objs.x <- list( x.values.1, x.values.2 ) na.cutoffs.1.bool <- is.na( y.values.1) & !is.nan( y.values.1 ) nan.cutoffs.1.bool <- is.nan( y.values.1) na.cutoffs.2.bool <- is.na( y.values.2) & !is.nan( y.values.2 ) nan.cutoffs.2.bool <- is.nan( y.values.2) bools <- list(na.cutoffs.1.bool, nan.cutoffs.1.bool, na.cutoffs.2.bool, nan.cutoffs.2.bool) values <- c(NA,NaN,NA,NaN) for (j in 1:4) { for (k in which(bools[[j]])) { interval.max <- objs.x[[ ceiling(j/2) ]][k] interval.min <- -Inf if (k < length(objs.x[[ ceiling(j/2) ]])) { interval.min <- objs.x[[ ceiling(j/2) ]][k+1] } objs[[ ceiling(j/2) ]][cutoffs <= interval.max & cutoffs > interval.min ] <- values[j] } } alpha.values <- c(alpha.values, list(cutoffs)) x.values <- c(x.values, list(objs[[1]])) y.values <- c(y.values, list(objs[[2]])) } return( new("performance", x.name=x.name, y.name=y.name, alpha.name=alpha.name, x.values=x.values, y.values=y.values, alpha.values=alpha.values)) } .define.environments <- function() { ## There are five environments: long.unit.names, function.names, ## obligatory.x.axis, optional.arguments, default.values ## Define long names corresponding to the measure abbreviations. long.unit.names <- new.env() assign("none","None", envir=long.unit.names) assign("cutoff", "Cutoff", envir=long.unit.names) assign("acc", "Accuracy", envir=long.unit.names) assign("err", "Error Rate", envir=long.unit.names) assign("fpr", "False positive rate", envir=long.unit.names) assign("tpr", "True positive rate", envir=long.unit.names) assign("rec", "Recall", envir=long.unit.names) assign("sens", "Sensitivity", envir=long.unit.names) assign("fnr", "False negative rate", envir=long.unit.names) assign("tnr", "True negative rate", envir=long.unit.names) assign("spec", "Specificity", envir=long.unit.names) assign("ppv", "Positive predictive value", envir=long.unit.names) assign("prec", "Precision", envir=long.unit.names) assign("npv", "Negative predictive value", envir=long.unit.names) assign("fall", "Fallout", envir=long.unit.names) assign("miss", "Miss", envir=long.unit.names) assign("pcfall", "Prediction-conditioned fallout", envir=long.unit.names) assign("pcmiss", "Prediction-conditioned miss", envir=long.unit.names) assign("rpp", "Rate of positive predictions", envir=long.unit.names) assign("rnp", "Rate of negative predictions", envir=long.unit.names) assign("auc","Area under the ROC curve", envir=long.unit.names) assign("cal", "Calibration error", envir=long.unit.names) assign("mwp", "Median window position", envir=long.unit.names) assign("prbe","Precision/recall break-even point", envir=long.unit.names) assign("rch", "ROC convex hull", envir=long.unit.names) assign("mxe", "Mean cross-entropy", envir=long.unit.names) assign("rmse","Root-mean-square error", envir=long.unit.names) assign("phi", "Phi correlation coefficient", envir=long.unit.names) assign("mat","Matthews correlation coefficient", envir=long.unit.names) assign("mi", "Mutual information", envir=long.unit.names) assign("chisq", "Chi-square test statistic", envir=long.unit.names) assign("odds","Odds ratio", envir=long.unit.names) assign("lift", "Lift value", envir=long.unit.names) assign("f","Precision-Recall F measure", envir=long.unit.names) assign("sar", "SAR", envir=long.unit.names) assign("ecost", "Expected cost", envir=long.unit.names) assign("cost", "Explicit cost", envir=long.unit.names) ## Define function names corresponding to the measure abbreviations. function.names <- new.env() assign("acc", ".performance.accuracy", envir=function.names) assign("err", ".performance.error.rate", envir=function.names) assign("fpr", ".performance.false.positive.rate", envir=function.names) assign("tpr", ".performance.true.positive.rate", envir=function.names) assign("rec", ".performance.true.positive.rate", envir=function.names) assign("sens", ".performance.true.positive.rate", envir=function.names) assign("fnr", ".performance.false.negative.rate", envir=function.names) assign("tnr", ".performance.true.negative.rate", envir=function.names) assign("spec", ".performance.true.negative.rate", envir=function.names) assign("ppv", ".performance.positive.predictive.value", envir=function.names) assign("prec", ".performance.positive.predictive.value", envir=function.names) assign("npv", ".performance.negative.predictive.value", envir=function.names) assign("fall", ".performance.false.positive.rate", envir=function.names) assign("miss", ".performance.false.negative.rate", envir=function.names) assign("pcfall", ".performance.prediction.conditioned.fallout", envir=function.names) assign("pcmiss", ".performance.prediction.conditioned.miss", envir=function.names) assign("rpp", ".performance.rate.of.positive.predictions", envir=function.names) assign("rnp", ".performance.rate.of.negative.predictions", envir=function.names) assign("auc", ".performance.auc", envir=function.names) assign("cal", ".performance.calibration.error", envir=function.names) assign("prbe", ".performance.precision.recall.break.even.point", envir=function.names) assign("rch", ".performance.rocconvexhull", envir=function.names) assign("mxe", ".performance.mean.cross.entropy", envir=function.names) assign("rmse", ".performance.root.mean.squared.error", envir=function.names) assign("phi", ".performance.phi", envir=function.names) assign("mat", ".performance.phi", envir=function.names) assign("mi", ".performance.mutual.information", envir=function.names) assign("chisq", ".performance.chisq", envir=function.names) assign("odds", ".performance.odds.ratio", envir=function.names) assign("lift", ".performance.lift", envir=function.names) assign("f", ".performance.f", envir=function.names) assign("sar", ".performance.sar", envir=function.names) assign("ecost", ".performance.expected.cost", envir=function.names) assign("cost", ".performance.cost", envir=function.names) ## If a measure comes along with an obligatory x axis (including "none"), ## list it here. obligatory.x.axis <- new.env() assign("mxe", "none", envir=obligatory.x.axis) assign("rmse", "none", envir=obligatory.x.axis) assign("prbe", "none", envir=obligatory.x.axis) assign("auc", "none", envir=obligatory.x.axis) assign("rch","none", envir=obligatory.x.axis) ## ecost requires probability cost function as x axis, which is handled ## implicitly, not as an explicit performance measure. assign("ecost","none", envir=obligatory.x.axis) ## If a measure has optional arguments, list the names of the ## arguments here. optional.arguments <- new.env() assign("cal", "window.size", envir=optional.arguments) assign("f", "alpha", envir=optional.arguments) assign("cost", c("cost.fp", "cost.fn"), envir=optional.arguments) assign("auc", "fpr.stop", envir=optional.arguments) ## If a measure has additional arguments, list the default values ## for them here. Naming convention: e.g. "cal" has an optional ## argument "window.size" the key to use here is "cal:window.size" ## (colon as separator) default.values <- new.env() assign("cal:window.size", 100, envir=default.values) assign("f:alpha", 0.5, envir=default.values) assign("cost:cost.fp", 1, envir=default.values) assign("cost:cost.fn", 1, envir=default.values) assign("auc:fpr.stop", 1, envir=default.values) list(long.unit.names=long.unit.names, function.names=function.names, obligatory.x.axis=obligatory.x.axis, optional.arguments=optional.arguments, default.values=default.values) } ROCR/MD50000644000176000001440000000323012505027741011360 0ustar ripleyusersc43193465c01f2bc7036b1a21ac0938e *DESCRIPTION 5bcf86510d848a44f8d943b9231320be *INSTALL dc33bb94a44f11db390a8ab5510d1a42 *NAMESPACE 2fa08bdab7deee0de7bd50eec671f7b4 *NEWS 8f3a897135420e5fb99b1adebc4c8765 *R/ROCR_aux.R 08c4ee8081fb13b7da90b8a6faa28ea0 *R/performance.R c1d7f42127f1fca5957f11086e8c3357 *R/performance_measures.R 166e801703a416aa2a2cdbde298651db *R/performance_plots.R c6520d9937a5432bba0921b1c1ddc154 *R/prediction.R 19c3994bf2948eb6a694b7b7e7ae4590 *R/zzz.R 4913ca5661a5a89bd61706da40976871 *README 497d34bf928630ed582476e043a68f18 *data/ROCR.hiv.rda a6b723208917a41ca8d978a95640f1cc *data/ROCR.simple.rda e4d3b38035f21f0bd36606cd08b3ded3 *data/ROCR.xval.rda abe80443628a3c11d359a0f49d81dad0 *data/datalist bede4a3f07350fa3132b98b4d73eee33 *demo/00Index 618e74cf61daf4d6aa7419ebcd886bef *demo/ROCR.R 194b3f7d6c63a4a7d798b55f93f7221a *inst/CITATION d236e8f1d4e4c4a6e36ee3a7165d931a *man/ROCR.hiv.Rd b6269194a8faa5f9b3f7aaf2080af90a *man/ROCR.simple.Rd 6effa7e0b9bbc40346f616baf52b79ca *man/ROCR.xval.Rd b64409618258bd89926c7786ca252fea *man/performance-class.Rd d15a50e5ff272d4885c6d54c37ee0f63 *man/performance.Rd 729a749d6275f8c077351762e8494b6d *man/plot-methods.Rd 8393928cc1e8a96ab92b68e7650de96a *man/prediction-class.Rd 6e511f8b8439e40585eec050842a4665 *man/prediction.Rd 252544c94f32bde5458fa7eea14002ec *tools/README.unittests 09ff700221b6ed63f7144b7e33d4cd19 *tools/unittests/XXXrunit.ROCR.aux.RXXX 5056a46205152d57056250b299e17885 *tools/unittests/runit.aux.r c0154cae711af582b73facb505446f7c *tools/unittests/runit.consistency.r 3408f3e11ddb96b430a14734e033f631 *tools/unittests/runit.simple.r 221863191a9bc6548f52a4711b9ac966 *tools/unittests/testsuite.ROCR.R ROCR/README0000644000176000001440000000154312504526277011744 0ustar ripleyusersPlease support our work by citing the ROCR article in your publications: ------------------------------------------------------------------------ Sing T, Sander O, Beerenwinkel N, Lengauer T. [2005] ROCR: visualizing classifier performance in R. Bioinformatics 21(20):3940-1. Free full text: http://bioinformatics.oxfordjournals.org/content/21/20/3940.full Getting started with ROCR: -------------------------- * After installation (cf. file 'INSTALL'), and starting R, load the package with 'library(ROCR)'. * For a short overview of ROCR: demo(ROCR) * For an overview of ROCR's online help: help(package=ROCR) * ROCR help pages: help(prediction) help(performance) help(plot.performance) help('prediction-class') help('performance-class') * For more information, visit the ROCR website: http://rocr.bioinf.mpi-sb.mpg.de * Good luck! ROCR/DESCRIPTION0000644000176000001440000000265512505027741012570 0ustar ripleyusersPackage: ROCR Title: Visualizing the Performance of Scoring Classifiers Version: 1.0-7 Date: 2015-03-26 Depends: gplots, methods Author: Tobias Sing, Oliver Sander, Niko Beerenwinkel, Thomas Lengauer Description: ROC graphs, sensitivity/specificity curves, lift charts, and precision/recall plots are popular examples of trade-off visualizations for specific pairs of performance measures. ROCR is a flexible tool for creating cutoff-parameterized 2D performance curves by freely combining two from over 25 performance measures (new performance measures can be added using a standard interface). Curves from different cross-validation or bootstrapping runs can be averaged by different methods, and standard deviations, standard errors or box plots can be used to visualize the variability across the runs. The parameterization can be visualized by printing cutoff values at the corresponding curve positions, or by coloring the curve according to cutoff. All components of a performance plot can be quickly adjusted using a flexible parameter dispatching mechanism. Despite its flexibility, ROCR is easy to use, with only three commands and reasonable default values for all optional parameters. Maintainer: Tobias Sing License: GPL (>= 2) URL: http://rocr.bioinf.mpi-sb.mpg.de/ Packaged: 2015-03-26 10:34:10 UTC; singto1 NeedsCompilation: no Repository: CRAN Date/Publication: 2015-03-26 17:12:17 ROCR/man/0000755000176000001440000000000012504526277011634 5ustar ripleyusersROCR/man/ROCR.simple.Rd0000644000176000001440000000135012504526277014157 0ustar ripleyusers\name{ROCR.simple} \alias{ROCR.simple} \docType{data} \title{Data set: Simple artificial prediction data for use with ROCR} \description{ A mock data set containing a simple set of predictions and corresponding class labels. } \usage{data(ROCR.simple)} \format{A two element list. The first element, \code{ROCR.simple$predictions}, is a vector of numerical predictions. The second element, \code{ROCR.simple$labels}, is a vector of corresponding class labels.} \examples{ # plot a ROC curve for a single prediction run # and color the curve according to cutoff. data(ROCR.simple) pred <- prediction(ROCR.simple$predictions, ROCR.simple$labels) perf <- performance(pred,"tpr","fpr") plot(perf,colorize=TRUE) } \keyword{datasets} ROCR/man/plot-methods.Rd0000644000176000001440000001455012504526277014547 0ustar ripleyusers\name{plot-methods} \docType{methods} \alias{plot.performance} \alias{plot-methods} \alias{plot,performance-method} \alias{plot,performance,missing-method} \title{Plot method for performance objects} \description{This is the method to plot all objects of class performance.} \usage{ \S4method{plot}{performance,missing}(x, y, ..., avg="none", spread.estimate="none", spread.scale=1, show.spread.at=c(), colorize=FALSE, colorize.palette=rev(rainbow(256,start=0, end=4/6)), colorkey=colorize, colorkey.relwidth=0.25, colorkey.pos="right", print.cutoffs.at=c(), cutoff.label.function=function(x) { round(x,2) }, downsampling=0, add=FALSE ) } \arguments{ \item{x}{an object of class \code{performance}} \item{y}{not used} \item{...}{Optional graphical parameters to adjust different components of the performance plot. Parameters are directed to their target component by prefixing them with the name of the component (\code{component.parameter}, e.g. \code{text.cex}). The following components are available: \code{xaxis}, \code{yaxis}, \code{coloraxis}, \code{box} (around the plotting region), \code{points}, \code{text}, \code{plotCI} (error bars), \code{boxplot}. The names of these components are influenced by the R functions that are used to create them. Thus, \code{par(component)} can be used to see which parameters are available for a given component (with the expection of the three axes; use \code{par(axis)} here). To adjust the canvas or the performance curve(s), the standard \code{plot} parameters can be used without any prefix.} \item{avg}{If the performance object describes several curves (from cross-validation runs or bootstrap evaluations of one particular method), the curves from each of the runs can be averaged. Allowed values are \code{none} (plot all curves separately), \code{horizontal} (horizontal averaging), \code{vertical} (vertical averaging), and \code{threshold} (threshold (=cutoff) averaging). Note that while threshold averaging is always feasible, vertical and horizontal averaging are not well-defined if the graph cannot be represented as a function x->y and y->x, respectively.} \item{spread.estimate}{When curve averaging is enabled, the variation around the average curve can be visualized as standard error bars (\code{stderror}), standard deviation bars (\code{stddev}), or by using box plots (\code{boxplot}). Note that the function \code{plotCI}, which is used internally by ROCR to draw error bars, might raise a warning if the spread of the curves at certain positions is 0.} \item{spread.scale}{For \code{stderror} or \code{stddev}, this is a scalar factor to be multiplied with the length of the standard error/deviation bar. For example, under normal assumptions, \code{spread.scale=2} can be used to get approximate 95\% confidence intervals.} \item{show.spread.at}{For vertical averaging, this vector determines the x positions for which the spread estimates should be visualized. In contrast, for horizontal and threshold averaging, the y positions and cutoffs are determined, respectively. By default, spread estimates are shown at 11 equally spaced positions.} \item{colorize}{This logical determines whether the curve(s) should be colorized according to cutoff.} \item{colorize.palette}{If curve colorizing is enabled, this determines the color palette onto which the cutoff range is mapped.} \item{colorkey}{If true, a color key is drawn into the 4\% border region (default of \code{par(xaxs)} and \code{par(yaxs)}) of the plot. The color key visualizes the mapping from cutoffs to colors.} \item{colorkey.relwidth}{Scalar between 0 and 1 that determines the fraction of the 4\% border region that is occupied by the colorkey.} \item{colorkey.pos}{Determines if the colorkey is drawn vertically at the \code{right} side, or horizontally at the \code{top} of the plot.} \item{print.cutoffs.at}{This vector specifies the cutoffs which should be printed as text along the curve at the corresponding curve positions.} \item{cutoff.label.function}{By default, cutoff annotations along the curve or at the color key are rounded to two decimal places before printing. Using a custom \code{cutoff.label.function}, any other transformation can be performed on the cutoffs instead (e.g. rounding with different precision or taking the logarithm).} \item{downsampling}{ROCR can efficiently compute most performance measures even for data sets with millions of elements. However, plotting of large data sets can be slow and lead to PS/PDF documents of considerable size. In that case, performance curves that are indistinguishable from the original can be obtained by using only a fraction of the computed performance values. Values for downsampling between 0 and 1 indicate the fraction of the original data set size to which the performance object should be downsampled, integers above 1 are interpreted as the actual number of performance values to which the curve(s) should be downsampled.} \item{add}{If \code{TRUE}, the curve(s) is/are added to an already existing plot; otherwise a new plot is drawn.} } % \details{} \references{A detailed list of references can be found on the ROCn'COST homepage at \url{http://rocr.bioinf.mpi-sb.mpg.de}.} \author{Tobias Sing \email{tobias.sing@mpi-sb.mpg.de}, Oliver Sander \email{osander@mpi-sb.mpg.de}} % \note{} \seealso{\code{\link{prediction}}, \code{\link{performance}}, \code{\link{prediction-class}}, \code{\link{performance-class}}} \examples{ # plotting a ROC curve: library(ROCR) data(ROCR.simple) pred <- prediction( ROCR.simple$predictions, ROCR.simple$labels ) perf <- performance( pred, "tpr", "fpr" ) plot( perf ) # To entertain your children, make your plots nicer # using ROCR's flexible parameter passing mechanisms # (much cheaper than a finger painting set) par(bg="lightblue", mai=c(1.2,1.5,1,1)) plot(perf, main="ROCR fingerpainting toolkit", colorize=TRUE, xlab="Mary's axis", ylab="", box.lty=7, box.lwd=5, box.col="gold", lwd=17, colorkey.relwidth=0.5, xaxis.cex.axis=2, xaxis.col='blue', xaxis.col.axis="blue", yaxis.col='green', yaxis.cex.axis=2, yaxis.at=c(0,0.5,0.8,0.85,0.9,1), yaxis.las=1, xaxis.lwd=2, yaxis.lwd=3, yaxis.col.axis="orange", cex.lab=2, cex.main=2) } \keyword{hplot} ROCR/man/prediction.Rd0000644000176000001440000000634312504526277014271 0ustar ripleyusers\name{prediction} \alias{prediction} \title{Function to create prediction objects} \description{Every classifier evaluation using ROCR starts with creating a \code{prediction} object. This function is used to transform the input data (which can be in vector, matrix, data frame, or list form) into a standardized format.} \usage{ prediction(predictions, labels, label.ordering = NULL) } \arguments{ \item{predictions}{A vector, matrix, list, or data frame containing the predictions.} \item{labels}{A vector, matrix, list, or data frame containing the true class labels. Must have the same dimensions as 'predictions'.} \item{label.ordering}{The default ordering (cf.details) of the classes can be changed by supplying a vector containing the negative and the positive class label.} } \details{'predictions' and 'labels' can simply be vectors of the same length. However, in the case of cross-validation data, different cross-validation runs can be provided as the *columns* of a matrix or data frame, or as the entries of a list. In the case of a matrix or data frame, all cross-validation runs must have the same length, whereas in the case of a list, the lengths can vary across the cross-validation runs. Internally, as described in section 'Value', all of these input formats are converted to list representation. Since scoring classifiers give relative tendencies towards a negative (low scores) or positive (high scores) class, it has to be declared which class label denotes the negative, and which the positive class. Ideally, labels should be supplied as ordered factor(s), the lower level corresponding to the negative class, the upper level to the positive class. If the labels are factors (unordered), numeric, logical or characters, ordering of the labels is inferred from R's built-in \code{<} relation (e.g. 0 < 1, -1 < 1, 'a' < 'b', FALSE < TRUE). Use \code{label.ordering} to override this default ordering. Please note that the ordering can be locale-dependent e.g. for character labels '-1' and '1'. Currently, ROCR supports only binary classification (extensions toward multiclass classification are scheduled for the next release, however). If there are more than two distinct label symbols, execution stops with an error message. If all predictions use the same two symbols that are used for the labels, categorical predictions are assumed. If there are more than two predicted values, but all numeric, continuous predictions are assumed (i.e. a scoring classifier). Otherwise, if more than two symbols occur in the predictions, and not all of them are numeric, execution stops with an error message.} \value{An S4 object of class \code{prediction}.} \references{A detailed list of references can be found on the ROCR homepage at \url{http://rocr.bioinf.mpi-sb.mpg.de}.} \author{Tobias Sing \email{tobias.sing@mpi-sb.mpg.de}, Oliver Sander \email{osander@mpi-sb.mpg.de}} \seealso{\code{\link{prediction-class}}, \code{\link{performance}}, \code{\link{performance-class}}, \code{\link{plot.performance}} } \examples{ # create a simple prediction object library(ROCR) data(ROCR.simple) pred <- prediction(ROCR.simple$predictions,ROCR.simple$labels) } \keyword{classif}ROCR/man/performance.Rd0000644000176000001440000002507512504526277014435 0ustar ripleyusers\name{performance} \alias{performance} \title{Function to create performance objects} \description{All kinds of predictor evaluations are performed using this function. } \usage{ performance(prediction.obj, measure, x.measure="cutoff", \dots) } \arguments{ \item{prediction.obj}{An object of class \code{prediction}.} \item{measure}{Performance measure to use for the evaluation. A complete list of the performance measures that are available for \code{measure} and \code{x.measure} is given in the 'Details' section.} \item{x.measure}{A second performance measure. If different from the default, a two-dimensional curve, with \code{x.measure} taken to be the unit in direction of the x axis, and \code{measure} to be the unit in direction of the y axis, is created. This curve is parametrized with the cutoff.} \item{...}{Optional arguments (specific to individual performance measures).} } \details{Here is the list of available performance measures. Let Y and \eqn{\hat{Y}}{Yhat} be random variables representing the class and the prediction for a randomly drawn sample, respectively. We denote by \eqn{\oplus}{+} and \eqn{\ominus}{-} the positive and negative class, respectively. Further, we use the following abbreviations for empirical quantities: P (\# positive samples), N (\# negative samples), TP (\# true positives), TN (\# true negatives), FP (\# false positives), FN (\# false negatives). \describe{ \item{\code{acc}:}{Accuracy. \eqn{P(\hat{Y}=Y)}{P(Yhat = Y)}. Estimated as: \eqn{\frac{TP+TN}{P+N}}{(TP+TN)/(P+N)}.} \item{\code{err}:}{Error rate. \eqn{P(\hat{Y}\ne Y)}{P(Yhat != Y)}. Estimated as: \eqn{\frac{FP+FN}{P+N}}{(FP+FN)/(P+N)}.} \item{\code{fpr}:}{False positive rate. \eqn{P(\hat{Y}=\oplus | Y = \ominus)}{P(Yhat = + | Y = -)}. Estimated as: \eqn{\frac{FP}{N}}{FP/N}.} \item{\code{fall}:}{Fallout. Same as \code{fpr}.} \item{\code{tpr}:}{True positive rate. \eqn{P(\hat{Y}=\oplus|Y=\oplus)}{P(Yhat = + | Y = +)}. Estimated as: \eqn{\frac{TP}{P}}{TP/P}.} \item{\code{rec}:}{Recall. Same as \code{tpr}.} \item{\code{sens}:}{Sensitivity. Same as \code{tpr}.} \item{\code{fnr}:}{False negative rate. \eqn{P(\hat{Y}=\ominus|Y=\oplus)}{P(Yhat = - | Y = +)}. Estimated as: \eqn{\frac{FN}{P}}{FN/P}.} \item{\code{miss}:}{Miss. Same as \code{fnr}.} \item{\code{tnr}:}{True negative rate. \eqn{P(\hat{Y} = \ominus|Y=\ominus)}{P(Yhat = - | Y = -)}.} \item{\code{spec}:}{Specificity. Same as \code{tnr}.} \item{\code{ppv}:}{Positive predictive value. \eqn{P(Y=\oplus|\hat{Y}=\oplus)}{P(Y = + | Yhat = +)}. Estimated as: \eqn{\frac{TP}{TP+FP}}{TP/(TP+FP)}.} \item{\code{prec}:}{Precision. Same as \code{ppv}.} \item{\code{npv}:}{Negative predictive value. \eqn{P(Y=\ominus|\hat{Y}=\ominus)}{P(Y = - | Yhat = -)}. Estimated as: \eqn{\frac{TN}{TN+FN}}{TN/(TN+FN)}.} \item{\code{pcfall}:}{Prediction-conditioned fallout. \eqn{P(Y=\ominus|\hat{Y}=\oplus)}{P(Y = - | Yhat = +)}. Estimated as: \eqn{\frac{FP}{TP+FP}}{FP/(TP+FP)}.} \item{\code{pcmiss}:}{Prediction-conditioned miss. \eqn{P(Y=\oplus|\hat{Y}=\ominus)}{P(Y = + | Yhat = -)}. Estimated as: \eqn{\frac{FN}{TN+FN}}{FN/(TN+FN)}.} \item{\code{rpp}:}{Rate of positive predictions. \eqn{P( \hat{Y} = \oplus)}{P(Yhat = +)}. Estimated as: (TP+FP)/(TP+FP+TN+FN).} \item{\code{rnp}:}{Rate of negative predictions. \eqn{P( \hat{Y} = \ominus)}{P(Yhat = -)}. Estimated as: (TN+FN)/(TP+FP+TN+FN).} \item{\code{phi}:}{Phi correlation coefficient. \eqn{\frac{TP \cdot TN - FP \cdot FN}{\sqrt{ (TP+FN) \cdot (TN+FP) \cdot (TP+FP) \cdot (TN+FN)}}}{(TP*TN - FP*FN)/(sqrt((TP+FN)*(TN+FP)*(TP+FP)*(TN+FN)))}. Yields a number between -1 and 1, with 1 indicating a perfect prediction, 0 indicating a random prediction. Values below 0 indicate a worse than random prediction.} \item{\code{mat}:}{Matthews correlation coefficient. Same as \code{phi}.} \item{\code{mi}:}{Mutual information. \eqn{I(\hat{Y},Y) := H(Y) - H(Y|\hat{Y})}{I(Yhat, Y) := H(Y) - H(Y | Yhat)}, where H is the (conditional) entropy. Entropies are estimated naively (no bias correction).} \item{\code{chisq}:}{Chi square test statistic. \code{?chisq.test} for details. Note that R might raise a warning if the sample size is too small.} \item{\code{odds}:}{Odds ratio. \eqn{\frac{TP \cdot TN}{FN \cdot FP}}{(TP*TN)/(FN*FP)}. Note that odds ratio produces Inf or NA values for all cutoffs corresponding to FN=0 or FP=0. This can substantially decrease the plotted cutoff region.} \item{\code{lift}:}{Lift value. \eqn{\frac{P(\hat{Y}=\oplus|Y=\oplus)}{P(\hat{Y}=\oplus)}}{P(Yhat = + | Y = +)/P(Yhat = +)}.} \item{\code{f}:}{Precision-recall F measure (van Rijsbergen, 1979). Weighted harmonic mean of precision (P) and recall (R). \eqn{F = \frac{1}{\alpha \frac{1}{P} + (1-\alpha)\frac{1}{R}}}{F = 1/ (alpha*1/P + (1-alpha)*1/R)}. If \eqn{\alpha=\frac{1}{2}}{alpha=1/2}, the mean is balanced. A frequent equivalent formulation is \eqn{F = \frac{(\beta^2+1) \cdot P \cdot R}{R + \beta^2 \cdot P}}{F = (beta^2+1) * P * R / (R + beta^2 * P)}. In this formulation, the mean is balanced if \eqn{\beta=1}{beta=1}. Currently, ROCR only accepts the alpha version as input (e.g. \eqn{\alpha=0.5}{alpha=0.5}). If no value for alpha is given, the mean will be balanced by default.} \item{\code{rch}:}{ROC convex hull. A ROC (=\code{tpr} vs \code{fpr}) curve with concavities (which represent suboptimal choices of cutoff) removed (Fawcett 2001). Since the result is already a parametric performance curve, it cannot be used in combination with other measures.} \item{\code{auc}:}{Area under the ROC curve. This is equal to the value of the Wilcoxon-Mann-Whitney test statistic and also the probability that the classifier will score are randomly drawn positive sample higher than a randomly drawn negative sample. Since the output of \code{auc} is cutoff-independent, this measure cannot be combined with other measures into a parametric curve. The partial area under the ROC curve up to a given false positive rate can be calculated by passing the optional parameter \code{fpr.stop=0.5} (or any other value between 0 and 1) to \code{performance}.} \item{\code{prbe}:}{Precision-recall break-even point. The cutoff(s) where precision and recall are equal. At this point, positive and negative predictions are made at the same rate as their prevalence in the data. Since the output of \code{prbe} is just a cutoff-independent scalar, this measure cannot be combined with other measures into a parametric curve.} \item{\code{cal}:}{Calibration error. The calibration error is the absolute difference between predicted confidence and actual reliability. This error is estimated at all cutoffs by sliding a window across the range of possible cutoffs. The default window size of 100 can be adjusted by passing the optional parameter \code{window.size=200} to \code{performance}. E.g., if for several positive samples the output of the classifier is around 0.75, you might expect from a well-calibrated classifier that the fraction of them which is correctly predicted as positive is also around 0.75. In a well-calibrated classifier, the probabilistic confidence estimates are realistic. Only for use with probabilistic output (i.e. scores between 0 and 1).} \item{\code{mxe}:}{Mean cross-entropy. Only for use with probabilistic output. \eqn{MXE :=-\frac{1}{P+N}( \sum_{y_i=\oplus} ln(\hat{y}_i) + \sum_{y_i=\ominus} ln(1-\hat{y}_i))}{MXE := - 1/(P+N) \sum_{y_i=+} ln(yhat_i) + \sum_{y_i=-} ln(1-yhat_i)}. Since the output of \code{mxe} is just a cutoff-independent scalar, this measure cannot be combined with other measures into a parametric curve.} \item{\code{rmse}:}{Root-mean-squared error. Only for use with numerical class labels. \eqn{RMSE:=\sqrt{\frac{1}{P+N}\sum_i (y_i - \hat{y}_i)^2}}{RMSE := sqrt(1/(P+N) \sum_i (y_i - yhat_i)^2)}. Since the output of \code{rmse} is just a cutoff-independent scalar, this measure cannot be combined with other measures into a parametric curve.} \item{\code{sar}:}{Score combinining performance measures of different characteristics, in the attempt of creating a more "robust" measure (cf. Caruana R., ROCAI2004): SAR = 1/3 * ( Accuracy + Area under the ROC curve + Root mean-squared error ).} \item{\code{ecost}:}{Expected cost. For details on cost curves, cf. Drummond&Holte 2000,2004. \code{ecost} has an obligatory x axis, the so-called 'probability-cost function'; thus it cannot be combined with other measures. While using \code{ecost} one is interested in the lower envelope of a set of lines, it might be instructive to plot the whole set of lines in addition to the lower envelope. An example is given in \code{demo(ROCR)}.} \item{\code{cost}:}{Cost of a classifier when class-conditional misclassification costs are explicitly given. Accepts the optional parameters \code{cost.fp} and \code{cost.fn}, by which the costs for false positives and negatives can be adjusted, respectively. By default, both are set to 1.} } } \value{An S4 object of class performance.} \references{A detailed list of references can be found on the ROCR homepage at \url{http://rocr.bioinf.mpi-sb.mpg.de}.} \author{Tobias Sing \email{tobias.sing@mpi-sb.mpg.de}, Oliver Sander \email{osander@mpi-sb.mpg.de}} \note{Here is how to call 'performance' to create some standard evaluation plots: \describe{ \item{ROC curves:}{measure="tpr", x.measure="fpr".} \item{Precision/recall graphs:}{measure="prec", x.measure="rec".} \item{Sensitivity/specificity plots:}{measure="sens", x.measure="spec".} \item{Lift charts:}{measure="lift", x.measure="rpp".} } } \seealso{\code{\link{prediction}}, \code{\link{prediction-class}}, \code{\link{performance-class}}, \code{\link{plot.performance}} } \examples{ ## computing a simple ROC curve (x-axis: fpr, y-axis: tpr) library(ROCR) data(ROCR.simple) pred <- prediction( ROCR.simple$predictions, ROCR.simple$labels) perf <- performance(pred,"tpr","fpr") plot(perf) ## precision/recall curve (x-axis: recall, y-axis: precision) perf1 <- performance(pred, "prec", "rec") plot(perf1) ## sensitivity/specificity curve (x-axis: specificity, ## y-axis: sensitivity) perf1 <- performance(pred, "sens", "spec") plot(perf1) } \keyword{classif} ROCR/man/performance-class.Rd0000644000176000001440000000557312504526277015541 0ustar ripleyusers\name{performance-class} \docType{class} \alias{performance-class} \title{Class "performance"} \description{Object to capture the result of a performance evaluation, optionally collecting evaluations from several cross-validation or bootstrapping runs.} \section{Objects from the Class}{Objects can be created by using the \code{performance} function.} \section{Slots}{ \describe{ \item{\code{x.name}:}{Performance measure used for the x axis.} \item{\code{y.name}:}{Performance measure used for the y axis.} \item{\code{alpha.name}:}{Name of the unit that is used to create the parametrized curve. Currently, curves can only be parametrized by cutoff, so \code{alpha.name} is either \code{none} or \code{cutoff}.} \item{\code{x.values}:}{A list in which each entry contains the x values of the curve of this particular cross-validation run. x.values[[i]], y.values[[i]], and alpha.values[[i]] correspond to each other.} \item{\code{y.values}:}{A list in which each entry contains the y values of the curve of this particular cross-validation run.} \item{\code{alpha.values}:}{A list in which each entry contains the cutoff values of the curve of this particular cross-validation run.} } } \details{A \code{performance} object can capture information from four different evaluation scenarios: \itemize{ \item The behaviour of a cutoff-dependent performance measure across the range of all cutoffs (e.g. \code{performance( predObj, 'acc' )} ). Here, \code{x.values} contains the cutoffs, \code{y.values} the corresponding values of the performance measure, and \code{alpha.values} is empty.\cr \item The trade-off between two performance measures across the range of all cutoffs (e.g. \code{performance( predObj, 'tpr', 'fpr' )} ). In this case, the cutoffs are stored in \code{alpha.values}, while \code{x.values} and \code{y.values} contain the corresponding values of the two performance measures.\cr \item A performance measure that comes along with an obligatory second axis (e.g. \code{performance( predObj, 'ecost' )} ). Here, the measure values are stored in \code{y.values}, while the corresponding values of the obligatory axis are stored in \code{x.values}, and \code{alpha.values} is empty.\cr \item A performance measure whose value is just a scalar (e.g. \code{performance( predObj, 'auc' )} ). The value is then stored in \code{y.values}, while \code{x.values} and \code{alpha.values} are empty. } } \references{A detailed list of references can be found on the ROCR homepage at \url{http://rocr.bioinf.mpi-sb.mpg.de}.} \author{Tobias Sing \email{tobias.sing@mpi-sb.mpg.de}, Oliver Sander \email{osander@mpi-sb.mpg.de}} \seealso{\code{\link{prediction}}, \code{\link{performance}}, \code{\link{prediction-class}}, \code{\link{plot.performance}} } \keyword{classes} ROCR/man/prediction-class.Rd0000644000176000001440000000446712504526277015401 0ustar ripleyusers\name{prediction-class} \docType{class} \alias{prediction-class} \title{Class "prediction"} \description{Object to encapsulate numerical predictions together with the corresponding true class labels, optionally collecting predictions and labels for several cross-validation or bootstrapping runs.} \section{Objects from the Class}{Objects can be created by using the \code{prediction} function.} \section{Slots}{ \describe{ \item{\code{predictions}:}{A list, in which each element is a vector of predictions (the list has length > 1 for x-validation data.)} \item{\code{labels}:}{Analogously, a list in which each element is a vector of true class labels.} \item{\code{cutoffs}:}{A list in which each element is a vector of all necessary cutoffs. Each cutoff vector consists of the predicted scores (duplicates removed), in descending order.} \item{\code{fp}:}{A list in which each element is a vector of the number (not the rate!) of false positives induced by the cutoffs given in the corresponding 'cutoffs' list entry.} \item{\code{tp}:}{As fp, but for true positives.} \item{\code{tn}:}{As fp, but for true negatives.} \item{\code{fn}:}{As fp, but for false negatives.} \item{\code{n.pos}:}{A list in which each element contains the number of positive samples in the given x-validation run.} \item{\code{n.neg}:}{As n.pos, but for negative samples.} \item{\code{n.pos.pred}:}{A list in which each element is a vector of the number of samples predicted as positive at the cutoffs given in the corresponding 'cutoffs' entry.} \item{\code{n.neg.pred}:}{As n.pos.pred, but for negatively predicted samples.} } } \note{Every \code{prediction} object contains information about the 2x2 contingency table consisting of tp,tn,fp, and fn, along with the marginal sums n.pos,n.neg,n.pos.pred,n.neg.pred, because these form the basis for many derived performance measures.} \references{A detailed list of references can be found on the ROCR homepage at \url{http://rocr.bioinf.mpi-sb.mpg.de}.} \author{Tobias Sing \email{tobias.sing@mpi-sb.mpg.de}, Oliver Sander \email{osander@mpi-sb.mpg.de}} \seealso{\code{\link{prediction}}, \code{\link{performance}}, \code{\link{performance-class}}, \code{\link{plot.performance}} } \keyword{classes} ROCR/man/ROCR.xval.Rd0000644000176000001440000000210112504526277013633 0ustar ripleyusers\name{ROCR.xval} \alias{ROCR.xval} \docType{data} \title{Data set: Artificial cross-validation data for use with ROCR} \description{ A mock data set containing 10 sets of predictions and corresponding labels as would be obtained from 10-fold cross-validation. } \usage{data(ROCR.xval)} \format{A two element list. The first element, \code{ROCR.xval$predictions}, is itself a 10 element list. Each of these 10 elements is a vector of numerical predictions for each cross-validation run. Likewise, the second list entry, \code{ROCR.xval$labels} is a 10 element list in which each element is a vector of true class labels corresponding to the predictions.} \examples{ # plot ROC curves for several cross-validation runs (dotted # in grey), overlaid by the vertical average curve and boxplots # showing the vertical spread around the average. data(ROCR.xval) pred <- prediction(ROCR.xval$predictions, ROCR.xval$labels) perf <- performance(pred,"tpr","fpr") plot(perf,col="grey82",lty=3) plot(perf,lwd=3,avg="vertical",spread.estimate="boxplot",add=TRUE) } \keyword{datasets} ROCR/man/ROCR.hiv.Rd0000644000176000001440000000307412504526277013461 0ustar ripleyusers\name{ROCR.hiv} \alias{ROCR.hiv} \docType{data} \title{Data set: Support vector machines and neural networks applied to the prediction of HIV-1 coreceptor usage} \description{ Linear support vector machines (libsvm) and neural networks (R package nnet) were applied to predict usage of the coreceptors CCR5 and CXCR4 based on sequence data of the third variable loop of the HIV envelope protein. } \usage{data(ROCR.hiv)} \format{A list consisting of the SVM (\code{ROCR.hiv$hiv.svm}) and NN (\code{ROCR.hiv$hiv.nn}) classification data. Each of those is in turn a list consisting of the two elements \code{$predictions} and \code{$labels} (10 element list representing cross-validation data).} \references{Sing, T. & Beerenwinkel, N. & Lengauer, T. "Learning mixtures of localized rules by maximizing the area under the ROC curve". 1st International Workshop on ROC Analysis in AI, 89-96, 2004.} \examples{ data(ROCR.hiv) attach(ROCR.hiv) pred.svm <- prediction(hiv.svm$predictions, hiv.svm$labels) perf.svm <- performance(pred.svm, 'tpr', 'fpr') pred.nn <- prediction(hiv.nn$predictions, hiv.svm$labels) perf.nn <- performance(pred.nn, 'tpr', 'fpr') plot(perf.svm, lty=3, col="red",main="SVMs and NNs for prediction of HIV-1 coreceptor usage") plot(perf.nn, lty=3, col="blue",add=TRUE) plot(perf.svm, avg="vertical", lwd=3, col="red", spread.estimate="stderror",plotCI.lwd=2,add=TRUE) plot(perf.nn, avg="vertical", lwd=3, col="blue", spread.estimate="stderror",plotCI.lwd=2,add=TRUE) legend(0.6,0.6,c('SVM','NN'),col=c('red','blue'),lwd=3) } \keyword{datasets} ROCR/tools/0000755000176000001440000000000012504760173012214 5ustar ripleyusersROCR/tools/README.unittests0000644000176000001440000000024312504526277015141 0ustar ripleyusersYou need the R package 'RUnit' to run the unit tests for ROCR. You can start the test suite by typing: R --vanilla < unittests/runit.ROCR.R from _this_ directory. ROCR/tools/unittests/0000755000176000001440000000000012504526277014263 5ustar ripleyusersROCR/tools/unittests/XXXrunit.ROCR.aux.RXXX0000644000176000001440000000040112504526277020122 0ustar ripleyuserslibrary(RUnit) myTestSuite <- defineTestSuite("ROCR test suite", "tests","runit.aux.r") isValidTestSuite(myTestSuite) testData <- runTestSuite(myTestSuite) printTextProtocol(testData, showDetails=TRUE) printHTMLProtocol(testData, "tests/testresults.html") ROCR/tools/unittests/runit.aux.r0000644000176000001440000000227512504526277016411 0ustar ripleyuserslibrary(RUnit) library(ROCR) testFarg <- function() { ll <- list(arg1=c(1,2,3), arg2=c(4,5,6)) print(str(.farg(ll, arg3=c(7,8,9)) )) checkEquals(.farg(ll, arg3=c(7,8,9)), list(arg1=c(1,2,3), arg2=c(4,5,6), arg3=c(7,8,9))) checkEquals(.farg(ll, arg1=c(1,4,3)), list(arg1=c(1,2,3), arg2=c(4,5,6))) } testGarg <- function() { ll <- list(arg1=list(1,2,3), arg2=list(4,5,6)) checkEquals(.garg(ll, 'arg1'), 1) checkEquals(.garg(ll, 'arg1',2), 2) checkEquals(.garg(ll, 'arg2',3), 6) checkEquals(.garg(ll, 'arg3'), ll$arg3) } testSlice <- function() { ll <- list(arg1=list(c(1,2,3), c(2,3,4), c(3,4,5)), arg2=list('a', 'b', 'c')) checkEquals(.slice.run(ll, 1), list(arg1=c(1,2,3), arg2='a')) checkEquals(.slice.run(ll, 2), list(arg1=c(2,3,4), arg2='b')) checkEquals(.slice.run(ll, 3), list(arg1=c(3,4,5), arg2='c')) ll <- list(arg1=list(c(1,2,3), c(2,3,4), c(3,4,5)), arg2=c('a', 'b', 'c')) checkEquals(.slice.run(ll, 1), list(arg1=c(1,2,3), arg2=c('a', 'b', 'c'))) checkEquals(.slice.run(ll, 2), list(arg1=c(2,3,4), arg2=c('a', 'b', 'c'))) checkEquals(.slice.run(ll, 3), list(arg1=c(3,4,5), arg2=c('a', 'b', 'c'))) } ROCR/tools/unittests/testsuite.ROCR.R0000644000176000001440000000041212504526277017200 0ustar ripleyuserslibrary(RUnit) library(ROCR) myTestSuite <- defineTestSuite("ROCR test suite", "unittests") isValidTestSuite(myTestSuite) testData <- runTestSuite(myTestSuite) printTextProtocol(testData, showDetails=TRUE) printHTMLProtocol(testData, "unittests/testresults.html") ROCR/tools/unittests/runit.consistency.r0000644000176000001440000004334312504526277020156 0ustar ripleyusers## ## ## library(RUnit) library(ROCR) # source("tests/runit.simple.r") # needed for .get.performance.measures ## predict performance measures on random data and check their consistency testConsistency <- function() { for (i in 1:100) { n.folds <- sample(1:10,1) fold.sizes <- sample(10:100, n.folds, replace=T) error.rates <- runif( n.folds ) pp <- .mock.prediction( fold.sizes, error.rates ) pred <- prediction( pp$predictions, pp$labels ) .check.prediction.object(pred) a <- .get.performance.measures( pred, c('acc','err','fpr','tpr','fnr','tnr','prec','pcfall','npv','pcmiss','rpp','rnp')) .check.consistency( a) } } testCombining <- function() { measures <- c('tpr','fpr','acc','err','rec','sens','fnr','tnr','spec', 'ppv','prec','npv','fall','miss','pcfall','pcmiss','rpp','rnp', 'phi','mat','mi','chisq','odds','lift') # 'auc','prbe','rch','mxe','rmse','phi','mat','mi','chisq', # 'odds','lift','f','sar','ecost','cost') for (measure1 in measures) { print(measure1) for (measure2 in measures) { n.folds <- sample(1:2,1) fold.sizes <- sample(10:20, n.folds, replace=T) error.rates <- runif( n.folds ) pp <- .mock.prediction( fold.sizes, error.rates ) pred <- prediction( pp$predictions, pp$labels ) .check.prediction.object(pred) perf1 <- performance( pred, measure1 ) perf2 <- performance( pred, measure2 ) perf3 <- performance( pred, measure2, measure1 ) .check.performance.object(perf1) .check.performance.object(perf2) .check.performance.object(perf3) for (i in 1:n.folds) { #check elements checkEquals(setequal( c( perf1@x.values[[i]], perf2@x.values[[i]]), perf3@alpha.values[[i]] ),T) checkEquals(setequal( perf1@y.values[[i]], perf3@x.values[[i]] ),T) checkEquals(setequal( perf2@y.values[[i]], perf3@y.values[[i]] ),T) #check order ind <- sapply( perf1@x.values[[i]], function(x) { min(which(x==perf3@alpha.values[[i]]))}) checkEquals( perf1@y.values[[i]], perf3@x.values[[i]][ind] ) checkEquals( perf2@y.values[[i]], perf3@y.values[[i]][ind] ) } } } } .get.performance.measures <- function(pred, measures) { ans <- list() for (measure in measures) { ## need to enclose y.values into a list to avoid flattening perf <- performance(pred, measure) .check.performance.object( perf ) ans <- c(ans, list(perf@y.values)) } names(ans) <- measures ans } .check.consistency <- function(measures) { ## check entries of contingency table for consistency for (measure in c("acc", "err", "fnr", "tpr", "fpr", "tnr", "pcfall", "prec", "npv", "pcmiss",'rpp','rnp')) { if (!measure %in% names(measures)) { stop(paste("Performance measure", measure, "not in argument list.")) } } for (i in 1:length(measures$acc)) { finite.bool <- is.finite(measures$acc[[i]]) & is.finite(measures$err[[i]]) checkEquals(measures$acc[[i]][finite.bool] + measures$err[[i]][finite.bool], rep(1,length(measures$acc[[i]]))[finite.bool]) finite.bool <- is.finite(measures$fnr[[i]]) & is.finite(measures$tpr[[i]]) checkEquals(measures$fnr[[i]][finite.bool] + measures$tpr[[i]][finite.bool], rep(1,length(measures$fnr[[i]]))[finite.bool]) finite.bool <- is.finite(measures$fpr[[i]]) & is.finite(measures$tnr[[i]]) checkEquals(measures$fpr[[i]][finite.bool] + measures$tnr[[i]][finite.bool], rep(1,length(measures$fpr[[i]]))[finite.bool]) finite.bool <- is.finite(measures$prec[[i]]) & is.finite(measures$pcfall[[i]]) checkEquals(measures$prec[[i]][finite.bool] + measures$pcfall[[i]][finite.bool], rep(1,length(measures$acc[[i]]))[finite.bool]) finite.bool <- is.finite(measures$npv[[i]]) & is.finite(measures$pcmiss[[i]]) checkEquals(measures$npv[[i]][finite.bool] + measures$pcmiss[[i]][finite.bool], rep(1,length(measures$acc[[i]]))[finite.bool]) checkEquals(measures$rpp[[i]] + measures$rnp[[i]], rep(1, length(measures$rpp[[i]]))) } } ## use consistency checks to validate results on pathological input cases performance.measures <- c('tpr','fpr','acc','err','rec','sens','fnr','tnr','spec', 'ppv','prec','npv','fall','miss','pcfall','pcmiss','rpp','rnp', 'auc','prbe','rch','mxe','rmse','phi','mat','mi','chisq', 'odds','lift','f','sar','ecost','cost') testPathological <- function() { # mxe needs 0,1 labels (warning otherwise), # rmse needs numeric labels (warning otherwise), sar as well pred <- prediction( c(0.1, 0.2, 0.5), c("a", "a", "b")) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe' & performance.measures != 'rmse' & performance.measures != 'sar'] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) pred <- prediction( c(0.1, 0.2, 0.5), c(F, F, T)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe' & performance.measures != 'rmse' & performance.measures != 'sar'] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) pred <- prediction( c(0.1, 0.2, 0.5), c("1", "1", "0")) .check.prediction.object(pred) measures.to.evaluate <- performance.measures measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) pred <- prediction( c(0.1, 0.2, 0.5), c(T, F, F)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe' & performance.measures != 'rmse' & performance.measures != 'sar' ] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) # prbe cannot be computed, because only one prec/rec pair available. pred <- prediction( c(0,0,0), c(0,1,1)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'prbe' ] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) pred <- prediction( c(0,0,0), ordered(c(0,0,0), levels=c(0,1))) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'auc' & performance.measures != 'prbe' & performance.measures != 'rch' & performance.measures != 'sar' & performance.measures != 'ecost'] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) pred <- prediction( c(-1,-0.2,-0.6), ordered(c(1,0,1), levels=c(0,1))) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe' ] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) pred <- prediction( c(-1,-0.2,-0.6), c(-1,1,-1)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe'] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) pred <- prediction( c(-1,-0.2,-0.6), c(3,2,3)) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'mxe'] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) pred <- prediction( c(1), ordered(c("a"),levels=c('a','b'))) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[ performance.measures != 'auc' & performance.measures != 'prbe' & performance.measures != 'rch' & performance.measures != 'mxe' & performance.measures != 'rmse' & performance.measures != 'sar' & performance.measures != 'ecost'] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) } ############################################################ # test length of performance measures .check.performance.object <- function(perf) { ylen <- length(perf@y.values) xlen <- length(perf@x.values) alphalen <- length(perf@alpha.values) checkEquals( (xlen==0 || xlen==ylen) && (alphalen==0 || (alphalen==xlen && alphalen==ylen)), T ) if (xlen==ylen) { for (i in 1:ylen) checkEquals( length(perf@x.values[[i]]), length(perf@y.values[[i]]) ) } if (alphalen==ylen) { for (i in 1:ylen) checkEquals( length(perf@alpha.values[[i]]), length(perf@y.values[[i]]) ) } } .check.prediction.object <- function( pred) { # 1. all entries in prediction object must have equals number of cross-validation runs lenvec <- c(length(pred@predictions), length(pred@labels), length(pred@cutoffs), length(pred@fp), length(pred@tp), length(pred@fn), length(pred@tn), length(pred@n.pos), length(pred@n.neg), length(pred@n.pos.pred), length(pred@n.neg.pred)) checkEquals( length(unique(lenvec)), 1) # 2. inside: xval runs: for (i in 1:length(pred@predictions)) { checkEquals( length(pred@predictions[[i]]), length(pred@labels[[i]])) lenvec <- c(length(pred@cutoffs[[i]]), length(pred@fp[[i]]), length(pred@tp[[i]]), length(pred@fn[[i]]), length(pred@tn[[i]]), length(pred@n.pos.pred[[i]]), length(pred@n.neg.pred[[i]])) checkEquals( length(unique(lenvec)), 1) checkEquals( unique(lenvec), length(unique(pred@predictions[[i]]))+1 ) } # 3. cutoffs sorted in descending order? for (i in 1:length(pred@predictions)) { checkEquals( sort(pred@cutoffs[[i]], decreasing=TRUE ), pred@cutoffs[[i]] ) } # 4. check 2x2 table for consistency with marginal sums for (i in 1:length(pred@predictions)) { checkEquals( pred@tp[[i]] + pred@fp[[i]], pred@n.pos.pred[[i]] ) checkEquals( pred@fn[[i]] + pred@tn[[i]], pred@n.neg.pred[[i]] ) checkEquals( pred@tp[[i]] + pred@fn[[i]], rep( pred@n.pos[[i]], length(pred@tp[[i]])) ) checkEquals( pred@fp[[i]] + pred@tn[[i]], rep( pred@n.neg[[i]], length(pred@tp[[i]])) ) checkEquals(pred@n.pos.pred[[i]] + pred@n.neg.pred[[i]], rep( pred@n.pos[[i]] + pred@n.neg[[i]], length(pred@n.pos.pred[[i]])) ) checkEquals(pred@n.pos[[i]] + pred@n.neg[[i]], length(pred@labels[[i]])) } } ############################################################ # test measures for consistency on supplied data sets testDatabase <- function() { data(ROCR.simple) pred <- prediction(ROCR.simple$predictions, ROCR.simple$labels) .check.prediction.object(pred) measures.to.evaluate <- performance.measures measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) data(ROCR.xval) pred <- prediction(ROCR.xval$predictions, ROCR.xval$labels) .check.prediction.object(pred) measures.to.evaluate <- performance.measures measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) data(ROCR.hiv) pred <- prediction(ROCR.hiv$hiv.nn$predictions, ROCR.hiv$hiv.nn$labels) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[performance.measures != 'mxe' & performance.measures != 'cal'] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) pred <- prediction(ROCR.hiv$hiv.svm$predictions, ROCR.hiv$hiv.svm$labels) .check.prediction.object(pred) measures.to.evaluate <- performance.measures[performance.measures != 'mxe' & performance.measures != 'cal'] measures <- .get.performance.measures(pred, measures.to.evaluate) .check.consistency( measures) } ## remove XXX below to include testDatabaseCombine (currently disabled for speed ## reasons) in the test suite. tXXXestDatabasesCombine <- function() { measures <- c('tpr','fpr','acc','err','rec','sens','fnr','tnr','spec', 'ppv','prec','npv','fall','miss','pcfall','pcmiss','rpp','rnp', 'phi','mat','mi','chisq','odds','lift') #'auc','prbe','rch','mxe','rmse','phi','mat','mi','chisq', #'odds','lift','f','sar','ecost','cost') print("Database combine test deactivated.") data(ROCR.simple) data(ROCR.xval) data(ROCR.hiv) all.pred <- list(prediction(ROCR.simple$predictions, ROCR.simple$labels), prediction(ROCR.xval$predictions, ROCR.xval$labels), prediction(ROCR.hiv$hiv.nn$predictions, ROCR.hiv$hiv.nn$labels), prediction(ROCR.hiv$hiv.svm$predictions, ROCR.hiv$hiv.svm$labels)) lapply(all.pred, .check.prediction.object) for (pred in all.pred) { for (measure1 in measures) { print(measure1) for (measure2 in measures) { perf1 <- performance( pred, measure1 ) perf2 <- performance( pred, measure2 ) perf3 <- performance( pred, measure2, measure1 ) .check.performance.object(perf1) .check.performance.object(perf2) .check.performance.object(perf3) for (i in 1:length(pred@labels)) { #check elements checkEquals(setequal( c( perf1@x.values[[i]], perf2@x.values[[i]]), perf3@alpha.values[[i]] ),T) checkEquals(setequal( perf1@y.values[[i]], perf3@x.values[[i]] ),T) checkEquals(setequal( perf2@y.values[[i]], perf3@y.values[[i]] ),T) # check order ind <- sapply( perf1@x.values[[i]], function(x) { min(which(x==perf3@alpha.values[[i]]))}) checkEquals( perf1@y.values[[i]], perf3@x.values[[i]][ind] ) checkEquals( perf2@y.values[[i]], perf3@y.values[[i]][ind] ) } } } } } ############################################################ crashCases <- list( ## cases that are ok to crash: list(pred= c(0), lab= c(0)), #-> Number of classes is not equal to 2. list(pred= c(1), lab= c(1)), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c(1,1,1)), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c(0,0,0)), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c("a", "a", "a")), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c(T, T, T)), #-> Number of classes is not equal to 2. list(pred= c(0.1, 0.2, 0.5), lab= c(F, F, F)) #-> Number of classes is not equal to 2. ) # list(pred= c(), lab= c()), #-> Number of classes is not equal to 2. testCrash <- function() { for (case in crashCases) { cat(case$pred, " ", case$lab, "\n") checkException(pred <- prediction(case$pred, case$lab)) #checkException(measures <- .get.performance.measures(pred)) } } # .mock.prediction <- function( n.predictions, error.rate ) { if ( length(n.predictions) > 1 && length(error.rate)==1) { error.rate <- rep(error.rate, length(n.predictions) ) } if (length(n.predictions)>1) { predictions <- list() labels <- list() } else { predictions <- c() labels <- c() } for (i in 1:length(n.predictions)) { current.predictions <- runif( n.predictions[i] ) current.labels <- as.numeric( current.predictions >= 0.5) flip.indices <- sample( n.predictions[i], round( error.rate[i] * n.predictions[i] )) current.labels[ flip.indices ] <- !current.labels[ flip.indices ] # current.labels[ current.labels=="1" ] <- "+" # current.labels[ current.labels=="0" ] <- "-" if (length(n.predictions)>1) { predictions <- c( predictions, list( current.predictions )) labels <- c( labels, list( current.labels )) } } if (length( n.predictions)==1) { predictions <- list(current.predictions) labels <- list(current.labels) } ans <- list(predictions= predictions, labels= labels) # ensure, that random labels have exactly two levels if (any( sapply(labels, function(run) {length(unique(run))}) != rep(2, length(labels)) )) { print(paste("XXX", labels, str(n.predictions), str(error.rate))) return(.mock.prediction(n.predictions, error.rate)) } else return( ans ) } ROCR/tools/unittests/runit.simple.r0000644000176000001440000003125012504526277017100 0ustar ripleyusers library(RUnit) library(ROCR) #source("prediction.R") #source("performance.R") #source("performance_measures.R") #source("zzz.R") some.predictions <- c(0.02495517, 0.92535646, 0.86251887, 0.80946685, 0.70922858, 0.69762824, 0.50604485, 0.25446810, 0.10837728, 0.07250349) some.labels <- c(0,1,1,0,1,1,0,1,0,0) tp.reference <- c(0, 1, 2, 2, 3, 4, 4, 5, 5, 5, 5) fp.reference <- c(0, 0, 0, 1, 1, 1, 2, 2, 3, 4, 5) pp.reference <- c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) np.reference <- c(10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) p.reference <- rep(5, 11) n.reference <- rep(5, 11) tn.reference <- n.reference-fp.reference fn.reference <- p.reference-tp.reference # manually calculated reference measures rpp.reference <- c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0) rnp.reference <- c(1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0.0) tpr.reference <- c(0.0, 0.2, 0.4, 0.4, 0.6, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0) fpr.reference <- c(0.0, 0.0, 0.0, 0.2, 0.2, 0.2, 0.4, 0.4, 0.6, 0.8, 1.0) acc.reference <- c(0.5, 0.6, 0.7, 0.6, 0.7, 0.8, 0.7, 0.8, 0.7, 0.6, 0.5) err.reference <- c(0.5, 0.4, 0.3, 0.4, 0.3, 0.2, 0.3, 0.2, 0.3, 0.4, 0.5) rec.reference <- tpr.reference sens.reference<- tpr.reference fnr.reference <- c(1.0, 0.8, 0.6, 0.6, 0.4, 0.2, 0.2, 0.0, 0.0, 0.0, 0.0) tnr.reference <- c(1.0, 1.0, 1.0, 0.8, 0.8, 0.8, 0.6, 0.6, 0.4, 0.2, 0.0) spec.reference<- tnr.reference ppv.reference <- c(0/0, 1/1, 2/2, 2/3, 3/4, 4/5, 4/6, 5/7, 5/8, 5/9, 5/10) npv.reference <- c(5/10, 5/9, 5/8, 4/7, 4/6, 4/5, 3/4, 3/3, 2/2, 1/1, 0/0) prec.reference<- ppv.reference fall.reference <- fpr.reference miss.reference <- fnr.reference pcfall.reference <- c(0/0, 0/1, 0/2, 1/3, 1/4, 1/5, 2/6, 2/7, 3/8, 4/9, 5/10) pcmiss.reference <- c(5/10, 4/9, 3/8, 3/7, 2/6, 1/5, 1/4, 0/3, 0/2, 0/1, 0/0) auc.reference <- 0.84 cal.reference <- c() ind <- rev(order(some.predictions)) sorted.predictions <- some.predictions[ind] sorted.labels <- some.labels[ind] for (i in 1:8) { mean.pred <- mean( sorted.predictions[i:(i+2)] ) frac.pos <- sum( sorted.labels[i:(i+2)] ) / 3 cal.reference <- c(cal.reference, abs( mean.pred - frac.pos )) } prbe.reference<- 0.8 prbe.reference.x <- 0.69762824 rch.reference.x <- fpr.reference[c(1,3,6,8,11)] rch.reference.y <- tpr.reference[c(1,3,6,8,11)] mxe.reference <- -(1/length(some.predictions)) * sum(some.labels*log(some.predictions) + (1-some.labels)*log(1-some.predictions)) rmse.reference <- sqrt((1/length(some.predictions)) * sum((some.predictions-some.labels)^2)) phi.reference <- (tp.reference*tn.reference-fp.reference*fn.reference) / sqrt(p.reference*n.reference*pp.reference*np.reference) mat.reference <- phi.reference my.log2 <- function( x ) { ans <- log2(x) ans[ ans==-Inf ] <- 0 ans } mi.reference <- (tn.reference * my.log2( tn.reference / (n.reference*np.reference)) + fn.reference*my.log2(fn.reference/(np.reference*p.reference)) + fp.reference*my.log2(fp.reference/(n.reference*pp.reference)) + tp.reference*my.log2(tp.reference/(p.reference*pp.reference))) / length(some.labels) + log2(length(some.labels)) chisq.reference <- (((pp.reference*p.reference/length(some.predictions)) - tp.reference)^2 / (pp.reference*p.reference/length(some.predictions)) + ((pp.reference*n.reference/length(some.predictions)) - fp.reference)^2 / (pp.reference*n.reference/length(some.predictions)) + ((np.reference*p.reference/length(some.predictions)) - fn.reference)^2 / (np.reference*p.reference/length(some.predictions)) + ((np.reference*n.reference/length(some.predictions)) - tn.reference)^2 / (np.reference*n.reference/length(some.predictions))) odds.reference <- (tp.reference*tn.reference) / (fn.reference*fp.reference) lift.reference <- (tp.reference/p.reference) / (pp.reference/(p.reference+n.reference)) f.reference <- 1 / (0.5 * ((1/prec.reference) + (1/rec.reference))) sar.reference <- 1/3 * (acc.reference + auc.reference + (1-rmse.reference)) cost.reference <- (fpr.reference * n.reference/length(some.labels) * 1 + fnr.reference * p.reference/length(some.labels) * 1) .get.performance.measures <- function(pred) { tpr <- performance(pred, "tpr")@y.values[[1]] fpr <- performance(pred, "fpr")@y.values[[1]] acc <- performance(pred, "acc")@y.values[[1]] err <- performance(pred, "err")@y.values[[1]] rec <- performance(pred, "rec")@y.values[[1]] sens<- performance(pred, "sens")@y.values[[1]] fnr <- performance(pred, "fnr")@y.values[[1]] tnr <- performance(pred, "tnr")@y.values[[1]] spec<- performance(pred, "spec")@y.values[[1]] ppv <- performance(pred, "ppv")@y.values[[1]] prec<- performance(pred, "prec")@y.values[[1]] npv <- performance(pred, "npv")@y.values[[1]] fall<- performance(pred, "fall")@y.values[[1]] miss<- performance(pred, "miss")@y.values[[1]] pcfall <- performance(pred, "pcfall")@y.values[[1]] pcmiss <- performance(pred, "pcmiss")@y.values[[1]] rpp <- performance(pred, "rpp")@y.values[[1]] rnp <- performance(pred, "rnp")@y.values[[1]] auc <- performance(pred, "auc")@y.values[[1]] prbe<- performance(pred, "prbe")@y.values[[1]] rch <- performance(pred, "rch")@y.values[[1]] mxe <- performance(pred, "mxe")@y.values[[1]] rmse<- performance(pred, "rmse")@y.values[[1]] phi <- performance(pred, "phi")@y.values[[1]] mat <- performance(pred, "mat")@y.values[[1]] mi <- performance(pred, "mi")@y.values[[1]] chisq<- performance(pred, "chisq")@y.values[[1]] odds<- performance(pred, "odds")@y.values[[1]] lift<- performance(pred, "lift")@y.values[[1]] f <- performance(pred, "f")@y.values[[1]] sar <- performance(pred,"sar")@y.values[[1]] ecost <- performance(pred, "ecost")@y.values[[1]] cost <- performance(pred, "cost")@y.values[[1]] return(list(tpr=tpr, fpr=fpr, acc=acc, err=err, rec=rec, sens=sens, fnr=fnr, tnr=tnr, spec=spec, ppv=ppv, prec=prec, npv=npv, fall=fall, miss=miss, pcfall=pcfall, pcmiss=pcmiss, rpp=rpp, rnp=rnp, auc=auc, prbe=prbe, rch=rch, mxe=mxe, rmse=rmse, phi=phi, mat=mat, mi=mi, chisq=chisq, odds=odds, lift=lift, f=f, sar=sar, ecost=ecost, cost=cost)) } testEcost <- function() { ecost.x.reference <- c(0,1/3,0.5,1) ecost.y.reference <- c(0,0.2,0.2,0) pred <- prediction(some.predictions, some.labels) perf <- performance(pred, "ecost") ecost.x <- perf@x.values[[1]] ecost.y <- perf@y.values[[1]] checkEquals( ecost.x, ecost.x.reference ) checkEquals( ecost.y, ecost.y.reference ) } testCal <- function() { pred <- prediction(some.predictions, some.labels) cal <- performance(pred, "cal", window.size=floor(length(pred@predictions[[1]])/3))@y.values[[1]] cal.x <- performance(pred, "cal", window.size=floor(length(pred@predictions[[1]])/3))@x.values[[1]] cal.x.reference <- rev(sort( some.predictions ))[2:(length(some.predictions)-1)] checkEquals( cal, cal.reference) checkEquals( cal.x, cal.x.reference) } testCost <- function() { pred <- prediction(some.predictions, some.labels) for (cost.fp in rnorm(50)) { cost.fn <- rnorm(1) perf <- performance(pred, "cost", cost.fp=cost.fp, cost.fn=cost.fn) cost <- perf@y.values[[1]] my.cost.reference <- (fpr.reference * n.reference/length(some.labels) * cost.fp + fnr.reference * p.reference/length(some.labels) * cost.fn) checkEquals( cost, my.cost.reference) } } testRch <- function() { pred <- prediction(some.predictions, some.labels) perf <- performance( pred, "rch") rch.x <- perf@x.values[[1]] rch.y <- perf@y.values[[1]] checkEquals( rch.x, rch.reference.x ) checkEquals( rch.y, rch.reference.y ) } testPerformanceMeasuresReference <- function() { pred <- prediction(some.predictions, some.labels) measures <- .get.performance.measures(pred) attach(measures) checkEquals(tpr, tpr.reference) checkEquals(fpr, fpr.reference) checkEquals(acc, acc.reference) checkEquals(err, err.reference) checkEquals(rec, rec.reference) checkEquals(sens, sens.reference) checkEquals(fnr, fnr.reference) checkEquals(tnr, tnr.reference) checkEquals(spec, spec.reference) checkEquals(ppv, ppv.reference) checkEquals(prec,prec.reference) checkEquals(npv, npv.reference) checkEquals(fall, fall.reference) checkEquals(miss,miss.reference) checkEquals(pcfall, pcfall.reference) checkEquals(pcmiss,pcmiss.reference) checkEquals(rpp, rpp.reference) checkEquals(rnp,rnp.reference) checkEquals(auc, auc.reference) checkEquals(prbe, prbe.reference) checkEquals(mxe, mxe.reference) checkEquals(rmse, rmse.reference) checkEquals(phi, phi.reference) checkEquals(mat, mat.reference) checkEquals(mi, mi.reference) checkEquals(chisq, chisq.reference) checkEquals(odds, odds.reference) checkEquals(lift, lift.reference) checkEquals(f, f.reference) checkEquals(sar,sar.reference) checkEquals(cost, cost.reference) } testRMSE <- function() { pred <- prediction(c(0, 0, 1, 1), ordered(c(0, 0, 1, 1))) rmse <- performance(pred, "rmse")@y.values[[1]] checkEquals(rmse, 0) pred <- prediction(c(0.0, 0.0, 1.0, 1.0), ordered(c(1, 1, 0, 0), levels=c(1,0))) rmse <- performance(pred, "rmse")@y.values[[1]] checkEquals(rmse, 1) pred <- prediction(c(0.0, 0.0, 1.0, 1.0), ordered(c(2, 2, 3, 3))) rmse <- performance(pred, "rmse")@y.values[[1]] checkEquals( rmse, 2) pred <- prediction(c(-0.5, 0.2, 2.5, 0.3), ordered(c(-1, -1, 1, 1))) rmse <- performance(pred, "rmse")@y.values[[1]] checkEquals( rmse, sqrt(1/4*(0.5^2 + 1.2^2 + 1.5^2 + 0.7^2))) } testPRBE <- function() { pred <- prediction(some.predictions, some.labels) prbe.y <- performance(pred, "prbe")@y.values[[1]] prbe.x <- performance(pred, "prbe")@x.values[[1]] checkEquals(prbe.y, prbe.reference) checkEquals(prbe.x, prbe.reference.x) } testPredictionInterface <- function() { pred <- prediction(seq(0, 1, length=10), c(rep(0,5), rep(1,5))) checkEquals(performance(pred, "auc")@y.values[[1]], 1) pred <- prediction(seq(1, 0, length=10), c(rep(0,5), rep(1,5))) checkEquals(performance(pred, "auc")@y.values[[1]], 0) pred <- prediction(seq(0, 1, length=10), factor(c(rep(0,5), rep(1,5)))) checkEquals(performance(pred, "auc")@y.values[[1]], 1) pred <- prediction(seq(0, 1, length=10), ordered(c(rep(0,5), rep(1,5)))) checkEquals(performance(pred, "auc")@y.values[[1]], 1) pred <- prediction(seq(0, 1, length=10), ordered(c(rep(0,5), rep(1,5)), levels=c(1,0))) checkEquals(performance(pred, "auc")@y.values[[1]], 0) pred <- prediction(seq(0, 1, length=10), ordered(c(rep("A",5), rep("B",5)))) checkEquals(performance(pred, "auc")@y.values[[1]], 1) checkException(pred <- prediction(seq(0, 1, length=10), c(rep(0,5), rep(1,5)), label.ordering=c(1,2))) checkException(pred <- prediction(list(c(0.1,0.3,0.7,1), c(0,0.2,0.8,1)), list(factor(c(0,0,1,1)), factor(c(1,1,2,2))))) checkException(pred <- prediction(list(c(0.2,0.3,0.7,1), c(0,0.2,0.8,1)), list(factor(c(0,0,1,1)), ordered(c(0,0,1,1))))) pred <- prediction(list(c(0,0.3,0.7,1), c(0,0.2,0.8,1)), list(factor(c(0,0,1,1)), factor(c(0,0,1,1)))) checkEquals(performance(pred, "auc")@y.values, list(1, 1)) pred1 <- prediction(data.frame(c(0,0.3,0.7,1), c(0,0.2,0.8,1)), data.frame(factor(c(0,0,1,1)), factor(c(0,0,1,1)))) checkEquals( pred, pred1) pred2 <- prediction(cbind(c(0,0.3,0.7,1), c(0,0.2,0.8,1)), cbind(c(0,0,1,1), c(0,0,1,1))) checkEquals(pred, pred2) }