unbalanced/0000755000175100001440000000000012543234315012366 5ustar hornikusersunbalanced/NAMESPACE0000644000175100001440000000012412540266574013613 0ustar hornikusersimport("RANN","FNN", "mlr", "foreach", "doParallel") exportPattern("^[[:alpha:]]+") unbalanced/data/0000755000175100001440000000000012543220332013271 5ustar hornikusersunbalanced/data/ubIonosphere.rda0000644000175100001440000012453012543220332016430 0ustar hornikuserswE>#0+* sdC 挠(b %J"9%9ܜ}3=3UO=#w>rS'hѢeVZJ薉'o ~{hѪM⻋إ/īoOÑCW=Ϲo X+o N=3??<]v9_]l%?y{Gz[~ʴ9{oγ]dh{ԥ^qۆg×|Kϝm\]W=>}㇏mu.59wM]O| _i!L_?}iO>H&CcSӏ3o{g (u?gGxIeI㠪zo ڿô*p[~8 o SEY {||⍟.:(N{gЂ|˹K"8wh87rOAlOϯ ׏Kd4?Ϛ_9/\#Rm/}v޳̏o\|wg7N|=EPu;8L 5G>},|jug5ྡྷm֮g@l{*=:[}nڞ1 :1/|GWd푍[3s/:sJO/U^%UKj=3yb(ћt{_yq\Ѳxw' TuǯuWC{=^}%i_.] k\֭!Xճ#Pip|Gˍb7dzuq遯]~p^wi d]:`9v>o_3xgyyOA`PG{T#^[{{NA^y PJ"jNKvC|h7tQ[7;>zc{=ݾ[iPjOIPf>'MrZeš'ḛ;bh_}X=rnO]gN#4#1?řm_~7{.>/\cn~^\ro _Nϳw;ㆵol5"*AGZ|RzĞՉ;9-߽ŽKμaOdY=< n5})yi} "g9;3:w۩Pml\ʤ r=BgY9[>j1X]\9nD ›L|(.03O?AgӞ}ۋhNxwN 'G=sy֦w %-,$9 Pյ»&r*DV%~.D~=MN-tBClWO ȱ/}x}O}b|++=PzPn/M oA~h[ q2BHwvo^Ӻsx~xK!6xEbf·؃P1!܏s__:tDtOq΂??pX;Wδ_?54O|t9ކ?T39! ˯&璫1O/ 돬{t@K\9[IfD<:'r{hu/'GCVθsMw[!G-s+;lU' &wOvۣ*S~3l[ʮ#.8 ;֞SB1zxzG+@+G ''qǪpke'߉ٮ=l}9#䎉3؞`}/u~> HbIU=]? 3iu,Ɂ*buyqGї{L{mN;~E_I7.غ y6P%2a`.RO`셸&IN'A&=>yBj9sxX׏~eƾ6ů,Lx#?N| G]=)]_T519,Xz 8(> *Rq0A n5>*_ v^sIH7< ~o:x֥epzˣ&|k TǮob͝]UMW=xYgscp7~}xӮXiɮ' ~$=KѣLSdۻ7e4盳vvq+Kc3^:as%3\O}a)ܠVJ '݊ n{V`/{ǿ͸*r=o(u{jcMK5->LgSR0msfxfg5e2׶ه;=To⸰^9pև7WwsVXv~+%ֶ|3sFB ^{<<1y[-/\k`w쁎Sڞl>o#~ne /ݵ赧FÏF}Y{{.V}y3aƀkNox/| &^sWՁ~zVs\y5>VcXsڐ#!LLV#L-sڝ}CI[xJˋq!7ntoLy/wgQ^VwX2WjfB]9Pc@ibvՃ㼣oW+W晪A>-~29mM?jp OHHL.X@qo숬r>ࡂϿ>, O'|8g[=[t50«6lBE˓Bx@4u{<'eqČ=u[x>9ӽ<'Q>8:::sxǒcCMԾ9` 7>o̒u㄄ss[p`( ؕ-#qү/ T/[osi{?T7]Cc^z瓑ks{'bH{h忎_'VSkR_ˈw1!Ɂ5K_8Cob)|>z/>"g-jpax%69?ˇ u;U|bsrnyp@p:>|sxxv-=}<Sj~Zye;Յ]HKz?Rw(xT ]_?Ƿpuqm>d՟7A{1%G*,lBJ-;6>=7eSPr/tFzsw18 \xhߛn]4S^PiGUZO{÷YeD5;_Jo}?' \[n&Tv lI5_zk9*WA_jpHmWBxW]vQѵGvAXmW@P'/دj8p.xq H\˛xhh\@:)S!*yX%W_(ۇwBos(a! ?LqU╀G(㜳|Nݰ\ 0 y x?կ$Ӟ/3vplߓ_˥"J݄? (1 &|f%̜Lk ?}}ҷv~ڌW޷~~I$sپViߙM;pۃӎ.Ā㫞-÷7V3-c xubxR9uPuW!UD;ov5j^gq.IawlQei^1/`bprA8u1 %`dX}B;%B}:,ݖjwM?~l^X0e/) 5܄k3gxY{ؘX_pKlT=t9]B:˴/Ff~܉_R><G=3V[6 x@p$Лs$fC4G븯ġto%vN-&twÜovǕg%4v0oxٿfلs9N Ly* Yb! M[̘ٕ>xv*$ʳ0;ydOmk(̄q}؞$yf*{kh)l:' AɣAa 0 eʼnxď*쓼vijP(xʔP=]*Z;!pf:?#>g{o ~Q0?|L_KqlXҬoeҖVk_M(r/)/vsO pBbأE^co?8`(؞ޏ=ՇEEpA|`*_  NkGQ?5wbU=x*r- +aAdޞ;~;9%UѢ ?ߐ|ؚЁ/u2I|!$Zb48:;g9Rǟno : &s pϜx(VWos'&s80y|pɲ!҇|Afu]@MG+wޏ̭Com8Ǿm?jqA4&sxҿ{?;ٽn3t;nZUƳ-9xն }Nscx9>y~7nBbC7)wU) >[E6BwOf{{ 틾lݲ yg,/?<> nqls֟5 f@1+9 0N9QQw΂*)@5m\R։P"B/so &qTby䙄~<_g`̓!pIy'4K߂CrwTG7Uq,W7gK)dyƶ,봅e.eΪc|CKpOٔ<{g4|B7O_Bh Kw=.ϸp8(lNC?HL!t-6FT ny})gQ,f!ēq?@Piolg(|C<w6~T8sǜ?s@<Ҟv&=@;u$=2o$_OK9`')^m~Mp=PjscVf{|~}I&%^S2vйƝ_*0ڭ851ǭME/$x5k?`~xcX;ZŜGNϙt;G3߷2O+fr* ulL|a1aτ*AlӪn`ƙyPzL 2]xO(LRƇΑ6~Zq;} ՍCvC@pG=22 E |i=cj'wL!Χ bG|oуVē=T`#a?d^r0~_SR@Hh=9V⼋)s!.$@xOۜX v*d,ĕo qg{NB_1ęGf? n 7 wNz$(k΁ g'F1 W}qN:w ?oK ZrC1 ̸F(_}Hna0-r^A̅ %QHDaKU'@]#3Y\ q!5s`y:D|vt?4VΟAΒgƎxn> N\I"Qy;V* UUG\aܔ}O.U_}?Oa缝7)X4/⚏89a ݘ'xxK ;l_P?}ǚBa?l?D na^buwq+ׇ`=%HdkטW\9U-q<IszuL w>5Hl`>ay׿m=lܔN?'(zfm3.|c nWYmRy;? W6|xJX,2W5?}TVH;8ycOBpUu,fL|G=NmJ ]k:>o8 69.qT 3ާ)ksÎ7M;x9sUx4??h'yO䟹K1~'1^5QGDIH|oc5s ^j[k^ΗoJ@,Uޅq/Dan0FEp'_L_qW7`LIgW}c/IRѭ'4/}IU?~ae1no@ǝoFƥL|ZGp$w&j5ǓWg\M qKr?5z(N>_76fy_ ^?~Jgfq$a-07E^y^1}?:k{ο-z`Ӯl|6Y\0^PM $mo0ap[qԾje!FWDl!TN󛪿"·&ƣ&AkJkmYW}e{9_b/q7o En=u+?"x@eFr354G =zM@ifS5:#hUUQ)xvr`<1; aOnǩ,ԩ@\ 'e+ ℛ,_xOp_{jtqA&ޖ |sazЩ,6L^px:}$(Bxi70>O_?&oC骺6Y@Z;^~3m^X ThTHU NU O=y?1ۃ2m+-&b,|{u=>eOރBPO_>ƻCv#~*%LH.p;We;] WQ?t73{ ]8dt _O_dGWNiqnUvBC>IY,Z Vn ?} d>W*4s˓0܏.X{7Vs|08b4xN:XK7@{9IXUwuZȖZr] =pV޴/!Pڼr% ,La>TŬ63{Ha+5~sT~ w 2cw.,A8W1F ,}oo{5"#fܜ!< x]|CKVd={N؁\ ; iE ڼckrYCZN7w0_yWq]>ϭ~AO*KڸmW-?~^ۥwJLGJWLPѣ8BR?$<=Vau8~f^I9sOJ\s<;u*P߱Dep矩+2cjR5odqais&U&P{1d`'ws OTf{[DLe_eWa?u; /s|\R7qru F=\wP+:Uv{w\"ROn6tŸ2 `QA)מv:&S+9as' ߂W{}HS!5rM~} qRk8q u _ӯđ?~ӯ1E?lX)-ʜbv恞di<{ޥ[ƅ(G,vq]S8)=Np  A'ͥTn+~lyc.9GggLx(żUR˺ Kݴ'4XvWnG?Bߎ=2mIf<[оC -?_ց R`uCX(FpJ[;15 }6PGA0 A#3n5bKǧo2=UCI4'?D8oi_WMӮOk<4RKپ2'%})u&;LxaT ya!Wp2ןI[a_|<.^%dQ8Ʈ7>v~`4[}>~ 0=S'5W;iJX? Ʃe`!E:2,&vq6F\PxVXܖ%< f{<|EoкmWO|ksCKP]]s d<59 _QMT2n!s l/mD\8C;S1aQO}]H)wdv^TKT a:%+q_2P'i> qbOKƼy}tP(mDy} *W3LVe+FFa3y]qba {|QcCܸ "bH?{ )*vr+#񫄕%p5 7* :DR'Wɺo1, imz3go@,_Ew4LM'Q!yMP/|.J/=˥*Z׷[)Cj*ol|~X-y~nzVlGr'V[ALxhJ</HM"?+b~!T]IyNta=ļ}PGdP/qzy/UC\8A\,唀U|9wvDh%E$8ĉ~-ܝW揱Fx(BlO!*݊ʡo& aUp,o01Y뜇+Eq/s4xU9ʁkaj8zZ?vOUAyh^8&u§^%y5~2G69nҧ9~vo*DbK<|9oϟ󼦝L׾Ix𛊈,x {:u^+OO 3UI8OqiK\3LP |n_=sR%yD$[_P7<-fq>ϴ9^eڞ>.nWWO7![¯!plro.MpU?"4kd(\!" ܌q< ΃R*jp:?%q -\ 3uxƯ0 oCpP:&\[/Z-^=VmAWrLapݰMɍ>櫸Q=ތYu|8b q3c<D[p7vu{ˏU`b̤0B/@bX*uz<Өx}%uwA^9+#A`eK\X)8ѾߋD3v5ߠ*/~AL~b38`%Oz0"@G/a:<* GG9D9cFn- =\g^C\c,Lpse ʷvJ?V=}X;\xt4w2\Isc7{eC]W%FƸ> ~9*E'1"{e{dϗ0sWxC0Gw1(<͍GxNiӰ^lj V3o\4@f>x%*1Fb’Knsy]{aP`k`;!J0޴W㸣"a`g eyM@UY07:!\8N(a mxkTཿ7 O㿵b??TOVb[Zq mj^}8-.?i CQb|GurwnIq=M>-B+QXOpcQC뷱f?O{zHc|*F{$IVa 2Mb'nJ_w;17Ƙ/֗|մFWz]7U{v;vڼGM^mVf[Dj_\qjV3mLI onפS'fU=YߧЯFqPwvKShYjzoQKㄓyTn%וcx"8 Їa# H_!*;zD7ub?^4(3*1*n`pH]%w,-!W 4-A$.Q*.*ĩꍆ2|N k_UGFˠXzSXH ixϧJ8hX *Ѡ E;DG}\wg8* ~@E,8:,g/B fb0K/POmO1koܘStc+eGO.X%:1>T@佨!w0)`W>[oB4I6X2z~ q`ηw*Ю 0LpWPI,څ+K'm* !!@o0D>Xw:Mΰ'BH=m+j#8G1 ·2jy:[+>+~Z'n0m-4N_7̔8pD4>fS1Sx*k$^Xy:+ Rs 7 uTڜ_8#|/Y'C7f=s̿h5bsq7.zQa;(trǝ*si8ԩw, ߂_괚*#_XG8"u1xvR 'xa+aIiG(>n]57 ο$~&nHҍ_;?CiPF˰~ N;SCKd HΊ$_̛ݏAX-T)\N:q?R;0KYvcI:lVtO-"L{ Y&z 7g@PLgʦn,~籓S2=&};gݍ㻐M|^jn 8/#C;S>-& je Ti !-{?m gBpIb$8m\ń7!u"^R <5IO!MtW(kebDO' w!G?wKSs0h ?[9Z=^E?svIRa}SBnZ[N{J_cڦJ>ŲH׍{2دR"q-Lct(Ay+J/uG.˄: $1_q6O?qn=ujM}r|7DC )涿wBA~ϣ_|1S91}nqnM z@pɣ&VNp )q׊hbE7*<><My`yu> *}lcIDcy4lsguSy6 շ(í4U9wzQ@CבAf|MM9͡]A1]cR/'K868C+i2RoDž_‘i9On'|n5?}!|f<7ÅaǛfR5㏡$R ooL۫ ns>kGD\oeNjeMrR*nηD"n2w I\ac=oo~`)b=7I ksP"\j9hjz\9 5|Gubgr+9)uz=s~@³X*\NX&QcZWr[+@-Auu!X#M1H2[w@N/wO<{]xR,|SO>Ju~>%{W C迅:ۉ{+EWMbPz,wn?Fa@t5!,1 h/~QG@v8-2>e0&zQN8$Ĥ",:a9Y{찲(`\t_x->8U 1\!Eu?=GDO(<=- /`\+=M "*9̯nzsJy($Tn$At ocp^L82-*'1㲓Bsdvi+;HSQ[*L+6N!:~Kod=jsoG7{nڥCb9a[ѥ{㇩k"_̕|VѱHi +E]qblx=PhΑ:t7C #ٔ #C?? ,g%_ܦXi3&:Pߪ.CNViϿB+US1K]%{^ zPOX ;PٖJ^?dz+dj3`fA| }ӟ.rY;,lq&c  yQ1Cl<? Iֽ8ln} QPӘH{5ĝUw*}*p7X-JJ x·q T [MrQb7:N@4[(q?bA-4J=D_b=|CAMC0C>y}ǀK@27s]N4K r3ű1-?$kB${5񈁃NKh.ȩw(:x$ÎҖOźE6o\}7$gP?$~gXrKY62PvGp [4d(!z*$d&q8?I<6z6--U4`*-| ȺM~Usf/zFD(Bqr_㹹Ѡ*2CC`kp5W>#BX֥Z>O={ q\{퉙Uo,%V>cܢ9Sr.x ybJ餾Pݭƍ'2{NW;D P,c0<'j&$tu|~+ęX+$ 9W\NBLm-Tb"Njru`S4B>4|GS OOEcR/{m?1Uׇ9~ t]2^m{bn 8qģcQ΅bѫd^\"|668/x"-:NINYo%?O~|p Ys> I _I|Z^ɇԫi/y~5cC瀏vMł|9~x$ǃkܜ״belF9a(SG\?_XoÎ1ثAO *M:a4.24˄Ps!OxŒv4IX' kO 6k}1H_Db{2ރd]$P|hP,k[TG`C? C8EH8|8%Ru" Jw $&m7랃W)Ap'_#aA< %,S f=2IhₗM  -|S=89o N?>$1&qO}bwE9vTAa#_q(Y&Vu 3 ?Wx'(߸V '?"b/SBl+u۱:!:c2Ѵ 6J=>z{&U i}~#ϸdXn*v& |?Q/Sxsנз> YU`cD>w1C/&:Af'sˌ[x,S߁KїgT]ON5(CB4!RPU}c0yQp%fa+co8+4N?!zz{&σV >\o)$`m 3&UJkk]CݬP !qlЧFa@@7ep[~L%dS<(u.<*Xf &9PY\ąp ǁ~ ѐ^Ik t6b| Rꩭ3!;w+2:":5<‹v-s)=7uD'?,ϩS8oĵ3Hlu8R߾pi>ZySv沟\oT'>7U$#{CXt0㸟Ǻ 8`a{p ^-z՜߆R z/MUPEYW;OU[%Ro \w'tJ9DxDJ3e u-خMLV==KUe1*S!|2ׁ%&|`YvgXu;uՔ_;v< N?xg$sS˻u8L8z*e?ɞ??6~,<71QEV > u֏|h:b|59_CK{7b&w h= .7 _?.;͍D&.+á^;aA#Miۏ8ȆtYW@@UxU1?X%pš|_RO Պ>'^֑)0BXV?oBH*R$m`{^ w_OSƕ֜oL#_?{=T6mq?mL?ֺu-zzMJ/!㿊fU+&[e||DeN%_}p"q2|LY_+RDO|gt[Hƴ뭎hg u;^S?T)¡~i }~RIM޻7s!Pk~z{,><*?S C;} ?@!;Ϣ_©o,nx޿A3AHt=/vh2-$푼`!ϙE%-h{yT f{Ps+nu]1_i+.%JQ',psSfC5İ_Ip:\_9E'ɼ|z@#ϓP/~r >[R kTTX粞y^i}+1qjXEjbx)j q} Ve~ԺhAbTxTv4zWK?~ALowy^G[E:bh`Tg! _[Pz:gK!$:$q5 1E1c>53/3=ZUbġ>v9%!8?yJE r{:1 ' &#(. 9'V *x NCL1#DŽ-HwAأ\jZo]Q38=V? W2U==S]p`Ǽ*U갆q~=NOio8nW@/sٴWs0?0X'aJz,V׌"E0vjT71 BK4eJIz0.J qtp>#Y_Eь㥵{[E-x!y&jU'<~ְ&эw:Y~B^Egx݂C{>|O.F2_z*%+|Յb!.y@PDAG<}yw3|/|]G/}~<#ӎGD v&<溙@ɠyTzuRIWV_3\{ԓ=oȬ6;JO$v:%O xě沽)\5>ZsIX ~x܍KtږJoMW1_\,(p۩ wIKqPb6ɟh06?ҢџnJǽQ'y- ǟTxN]*rQ=?m>H:Os~ >-fyl4\}H7l<9KK5ןG o?ib7qܜHj3~ؼ_.c>/y$߷>\=p\.:{gu|-<7 q Cg1n ~.yl:==uqK1O㱖Qizj׳Bj.f6\ E*oQ~JPB@ps YU!O1_ 7+yXPYc mq?0_b(ܾӡ`=*#\$06rZ>\if./Q2bg3#Jdۅh!Va3qu‡VFrgq%"03q~5EGϩWpdK: 4ܼn:T< ^oZmic .Ttga!Mm+%~ a i_{n`jO3UV`!\H-LW2JO~)$>'=VM"$Ѭ=2ݮg\C㰐ybkjMN_KG'[&Yu>q woWǍP˺c4¸7s~s$.OHe#iڕjzh]TtI [=ŕJU׵3Z_$j_u0(TPtf |Af >p!?P?d=yT܌|OlUPեПIm_r?d?O dR2ndrݰ|]7zs,e+1y> EKs(7b|q,ӿ-Ia ~U}?* ڸԹvx%?^K <yEz۠?CR80 u7D!~POG vAXúor0,uQ0gZOJWH$_!8n2N9#sEݢ'w,|0xceG@`lB2"({.u?$~,5DV~? ?Z]!Uڸ'owOLr\˻\dgܽ9PE7 J) Os>֣FGJ"p27ԕ*ANƎ9X'?BFbx4/SƓ!fq+'@jWX/bvz^l_;^*={yG{q<;W\>udC_}DtoG8b(Cd%?JQ? TfSKz7蠮 )M>эq!Bwϓv~Y~v5$|ϣr~?b~B,^k'[}2!297;jg.N ^vwOKNt˺4]Я4G?KKC4 ݮQ/aϛl/|>>S2m^es>2rrjnon#1z3!pv/Ǒlu9ܤLп/T `zX/@Wχ錟T6K^PwuF |dR}Ɇqpq} TI=R/Q8-߸Λ΀4?u1z =bHpeb߿q Sq(;PBPy] AZnqWW{n{C4F`<N 8 b'%7 JH!Lv#sA6]KZυV/V#X׍9BƫO'º\vW[[TK%?wPx fpb,Bk{>r7K\1$u1K}OL=3ND(;..ŌC1TP'<#*w2q;*qE?Lm%:[Oi'hd+Kf r=!gL;(}:Lp39il5yA*1/ciعJ^,<{nKi;?ԩYDm [y*w}۝R789J*~rÕ3^ax,2GJ`B-cUOh @@䅊w9n-k$/x{h9sÔŽS':6o!HbkC7>g_8m\:T(tu(|V1i){[?JG?Lq漂iU/EaX/񔽢"lt#--qEA~u0 \j_y\,$g5ћy!Kn圏0vb9K]< ,Ss~$?v^1~݌?v.eHX$߰yt6 a%]f$ʔ7?@7*1$kDE?'C=h;ƃ(O2OO^mp88yqO ф^%UB${?xo8pXFMO h$u9\"Y쀍(X(|}5|3.R*/j^(\׃7~ՊfXqU3;$X_LBPɊߌOP?FuaWǺX%uN[w^f%c  _Wؠ`ҿLC *:. n"X9U1BzJwÀ@\~"Na&lCcP1Xd>u6b a?d0FZ Erc7D/Ǒ0~ButN {2niE*c)X/SZwnka)€q߱/}x}ftڽas;=̼ɘ:d:n\n1WC~GL;}8Un;EA*s<7?[?-y1j n0둁S ު]y=_{Tb <y"63+ aMFsw pAPh6xj·yMJ$1rݗsݜ? dZ/aKE΄QL@V/33@m-ess[[Rc$t _GQ\K&,!A9NCt? >g^&.$y“Vx68BXlߔ_5W:&ɟ*y7*062 {1%8s+?p,y|l+$'E]|[xA2I/.޿fb'u9 n}ƓcGmG}@HTRkB h6J\a]E+8" x0שݴ]6/qf='@T9H瀠ioY܂ 27Nn6t)qJZrp&aIp2?K=]dOl"w:9h7y!f<ĿtDƿ;nԊdrN]O&Bh¢szEq8~K)% xŠŴM*?0(|u!].ѣ `f,jⵓF@\e%Nw-D  T Us )`ڳD'ϼ_A0#F;`?&YXsAHt#I76o>+ԡB3O<%h~/uG7,[P q&sYWn$ ; BܯS?ZKӭ @l#~y1;[BL#^-z:"@VD׉Y a<<HG1FD֪ &q;ֳ<@z@gAY;O2GW)-'ްmF7Y6뉎=szS LJ{;s}iAjA^|6) ^o0W=Nx T~x7Y%}W!AYü%R_X"Zhd|yPeEkC84bJewHߤ?_R˷Qp0aivg5wu:47Y<0oJ^_?$'l]SUz ?nmbʌ_VJ \0HF-㯂4ak=jݟ>c^o}}<'U HTxeU=^[U.%0F;3tPWKeT4(a!,"Ajb#ۊ_K{eSs=Ngq˘' =^HXX g'68'0z 7n沐"Brkp墟QL}o7]UL01"D~Nf^ tPq pfX ah2^ tKKݜ'@?Na `ԵN2ov sBj6ѕ):`H{9N{0aFv6Vu#,nMV @&iun"ذ u'yP"/r1a I΄W埗>,ywPyMR f$m>/%f=:<(:Mr@`h`>^hPueStJ}-%wi TTeRK %Z\yy:s/b@ɵ=t=7*!CF_N?*?5"s b X!EWP[F>f^9˽T,o^'b|Cn!!C .z"0#}.-:ja0>S @<yI{g0p/ANC /py#!FacNE^9c2y a8y6Y "b"c lJ f\B=^?3Jx=4K$c 0^DGawzuڬ NcE+BiTCoTfbFN:b=o;o?D'T4!cx 4L"@K £R!qBS,y"nˁ5fִc`XJ}蛮mr\p2_%̳|kpi5D'yz!J׬X%@>~rEgos|l;=1oʌ5;iE#]9dlI&4ٔn* q@MG]oT UVq}.L&^oT}( |>s}kvZ7,Cz+BqPg>?1Uez6@`3V#mq*nP)xl֣t_>&qc#p]nFwy[κEJsa Kɀ~a53&*n(k^6(.7<|/XBbz.8&> J )`%c?N6/_.$Rk Мo!vB^)<^zNpWk? ]0 Pݴݬmg7yFe~3EW\2\Am|$y P}[_a \Yجȏ ѡoKa6/Z>)/q']c~鿜yL;(qVvo5טKX~n?'/VaaN|mu)ҡ ->SۓP-]vh("8NVVF_n}$YaSfS,Vjnǩz}V%XGkAtG̏;.`=[oh<,w[ݜ0&$ 3wזMJ8q `<f w+9c_$(18L'81 u>J Fέ]Mca3W$u Dwd8-Qu:n *Sla ӊ0DHǎ0Iyt"7:P$bCG]u<8!KapR& O av趟ׅ6&f4Gpudc9PYOˋf{붛_d}Y=(fQo9^$/#.&PIg|_aU4:py۝-# [߽BƕvJQ#ubx x@/AH\*E.,x{ERc;l3_w3c*ap{* 39>$G(ǬTȼl7ż|F_h䏃ڝo߃8S#q(냚\l{<.+yܜS$.օx%ptSe8PSfF0X/ǚg;QRG'Dow$/'?Hn`@s}e\i)0#lDL6j7#8]w3J]%:[HEн[-:NM0(M.IfIO{~}ɴ2?i;T#X܁|h0,9o*{yR%,,C0I- B21Di(\×!-2ųР• '2?֎s|>Ǝ鸐?z_ 70(; / QhΧ(1 >YSUԮ*כo~W~ݯ`C3-=)~ k)z*6#z7m1m{|1xqMTB}xHƅ+XlcN<+}ǚDE/n#ߛEj߹XEgb[}|Mƽqv%Sxvc+Xq\󻽣GP[_#L.j>$~sPt%_°; qtJ|cYr~_j^7?JTOGmih6f|BTiu_ Ymuo8>e~K L1a1(9obIoIۯUn3q=78N 6ntn ~t2`#w`Š|9^&bW׻hVI~Xj u6OO z{z[]?99ѥI:0tF0 @_RnQn nCb%*`1_&"À;N3۷g`ٽM\oϟr_a/T-:@hMpp 1(*P$|vb7R MGD 9 خhL'7RF-6@&YwqrVQW7cr/x]Yokf[֑6.8wJTA!tw |] .[Ѣ[\y1,un[ /[J(c*[ 7^0(ߛ~48,y'C_[u̳%fvSEIxӧ ŗ\ܕ1Mx_ْo;%A k%5yj!'q@lFKoxa i}ཿ#kX/\} <%||FYg̐<O|-tFpy8܂,!%~SigKx\l3޶*#!<|.o{o?Tf6meu n>ˈm?Juwg> =L1oY=4M|0("媊ԫ%y֊|._S46Amp7X-9"rUF&cy.C !FɔgcM([U?nアGMxcwP}"gp~laqV QǭL~Ӷα/`"Ṑ9|PpD #d]y><2UpX􂶊_=O& /-N|__d@,3m2m+c!SN7_i.(` ҋnѽvRЃ< x7[,*=4Gh^++Rs]P vG5[".)SXޝUU2BԁxBC 8ɕؙ]K0 #߇R{S ?R䳝3e)~mPH#/@Tu^=.7| ~ t!$uMe0!N_.JSF':j^7H!$zUNxp' Dw6y]_hW-.0󹀓K|X@¶hR8ȦrO@g{?C7ߌAKu_1x |z؇M\*~ s.Gw<]eyTW/7PK `yݗ{LiÎϥ=2QwuAd16d bJ_g D|b}!`ydITGd\C`ɾxX6;yPͻ2Rʺ/VAlMHFXj]u=MQZKtLΓx4αՌ6Ua1Jx["xL8a w(‰*I%T5ʌ^%p# *:Un}MM%cK9kkηROY} |:\m%#l$l!Ysa!1 ) ˧ Ƒ@b T8n% J (aFl=f!7<\\^E_Y#y+D?c?QY? W>~u󱮪UID%χC)wE]r0CÊ GS`߻x:8ބ`?7 -u"5jqz*Y0։xlMc<(g`:' .rƸPL#G`齿zJ+hiUd,z=l=gWxәf2or%3q/&#ݨhp)9g|xȮ{u"1^[Z,%KG3N{;l^yT~nb<889c7v`TDUvI V̼?[ 7xAK3 p&/hd~ڽQ̀!Epc ϡBJ"U=v)߻_~cDHN։1כwrҜf}y'x _lE̗ !~B|6 g1>M)ݛ[]qGaސ0RVit״£sQ+6TX+kӌO$fNiwf|HHxdt{;4[|o5mp|.|x9%n=U"4u!:&*6N}6tŸl2DO'"9L{fR>YR=T{܂gri|U4,b~gQ^ ږGv>ㆵoB5s'AtEY~g7ݡa׀c0<Oáu1#xzA+of.x쫢{Pd#WBBCD~7z^EPx9Ac?\̛̿Hi+IU%' Kbsgc7&1}$%5e_/˰Dq? 2EWxb3ynO ˸9/Bkq] lί`%YWHBl~7+ƃi^~1Pͼɼ?bHMY_uI?}O=xMq}LP.)Dg%>1O=Xɜh jqC0ۿA }1P&:Ւ7r|K@Tk~uq(o5 ȡ sRנ%ۆw(u__/ufaS.5M@^;?N᛬]!q',h>~1} qrs 2l?,vߊ :/>,uT#ln j?w4ߋ[o xo {%?7w7yK ?ɻz˖Ou6bWwVOth33gϮ?Rlo)[-b{KRlo)T[-j{Kսsu ZtNx;eX$ݤQ }(Qϣ(}mG+;J_WKsZ;ܰ~4[-jiUKsZ;ܨ>4K-MjiQKsZ;ܠ4;-iiMKskZ>RMTG飳飳飳飳飳飳飳飳飳飳飋飋飋飋飋飋飋飋飋飋飫飫飫飫飫飫飫飫飫飫飛飛飛飛飛飛飛飛飛飛飻飻飻飻飻飻飻飻飻飻飇飇飇飇飇飇飇飇飇飇-:vdbjgWn?moJwVҝdt'k;Y+ZNJwVҝdt'k;Y+ZNJwVҝdt'k;Y+ZNT[-j{KuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuY[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[bmI%)֖X[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖Z[jmI%֖&l Zqvunbalanced/R/0000755000175100001440000000000012543220332012561 5ustar hornikusersunbalanced/R/ubCNN.R0000644000175100001440000000151012543220332013646 0ustar hornikusersubCNN <- function(X,Y,k=1,verbose=T){ #only numeric features are allowed is.not.num<-which(sapply(X,is.numeric)==FALSE) if(length(is.not.num)>0) stop("only numeric features are allowed to compute nearest neighbors") S.X<-X S.Y<-Y i.1<-which(Y==1) i.0<-which(Y==0) N.1<-length(i.1) N.0<-length(i.0) if(N.1==0 | N.0==0) { if(verbose) cat("All instances of the same class \n") return(list(X=X,Y=Y)) } #initially C contains all 1s from S and one random 0 obs id.C<-c(i.1,sample(i.0,1)) C.X<-X[id.C,] C.Y<-Y[id.C] #use C to to build a 1-NN and classify all obs in S Y.knn<-knn(C.X,S.X,C.Y,k) #move missclassified obs into C id.miss<-which(S.Y!=Y.knn) id.C<-c(id.C,id.miss) # id.C<-sample(id.C) X<-X[id.C, ] Y<-Y[id.C] #now C is consistent with S return(list(X=X,Y=Y)) } unbalanced/R/ubOver.R0000644000175100001440000000173712543220332014156 0ustar hornikusersubOver <- function(X, Y, k = 0, verbose=TRUE) { stopifnot(k >= 0, class(verbose) == "logical", all(unique(Y) %in% c(0, 1))) i.1 <- which(Y == 1) N.1 <- length(i.1) i.0 <- which(Y == 0) N.0 <- length(i.0) max.k <- floor(N.0/N.1) if (k == 0) { # sample with replacement from the minority class to obtain a balanced dataset i.1.over <- sample(i.1, N.0, replace = TRUE) } if (k > 0) { # sample with replacement from the minority class until we have k-times the orginal number of 1s N.1.over <- N.1 * k if (N.1.over > N.0) { if (verbose) cat("Max number of times allowed to replicate minority class is", max.k, "\n taking as many samples as the majority class \n") N.1.over <- N.0 } i.1.over <- sample(i.1, N.1.over, replace = TRUE) } Id = c(i.0, i.1.over) Id <- sort(Id) if (is.vector(X) != TRUE) X = X[Id, ] else X = X[Id] Y = Y[Id] return(list(X = X, Y = Y)) } unbalanced/R/ubSMOTE.R0000644000175100001440000000242612543220332014126 0ustar hornikusersubSMOTE <- function(X,Y,perc.over=200,k=5,perc.under=200,verbose=TRUE){ if(!is.factor(Y)) stop("Y has to be a factor") if(is.vector(X)) stop("X cannot be a vector") data<-cbind(X,Y) id.1 <- which(Y == 1) time<-system.time({ # generate synthetic cases from these id.1 newExs <- ubSmoteExs(data[id.1,],"Y",perc.over,k) }) # if(verbose) # cat("Time SMOTE:",round(as.numeric(time["elapsed"]),digits=2),"; perc.over",perc.over,"; perc.under",perc.under,"; k",k,"\n") row.has.na<-function(X) return(apply(X,1,function(x){any(is.na(x))})) row.is.na<-row.has.na(newExs) if(any(row.is.na)) { newExs<-newExs[!row.is.na, ] colnames(newExs)<-colnames(data) cat("WARNING: NAs generated by SMOTE removed \n") } # get the undersample of the "majority class" examples selMaj <- sample((1:NROW(data))[-id.1], as.integer((perc.under/100)*nrow(newExs)), replace=T) # the final data set (the undersample + the rare cases + the smoted exs) newdataset <- rbind(data[selMaj,],data[id.1,],newExs) #shuffle the order of instances newdataset<-newdataset[sample(1:NROW(newdataset)), ] X<-newdataset[ ,-ncol(newdataset)] Y<-newdataset[ ,ncol(newdataset)] return(list(X=X,Y=Y)) } unbalanced/R/ubUnder.R0000644000175100001440000000240612543220332014312 0ustar hornikusersubUnder <- function(X, Y, perc = 50, method = "percPos", w = NULL) { stopifnot(all(unique(Y) %in% c(0, 1))) N <- length(Y) i.1 <- which(Y == 1) N.1 <- length(i.1) i.0 <- which(Y == 0) N.0 <- length(i.0) if (N.1 >= N.0) stop("less 0s instances than 1s, the minority class has to be class 1") type = match.arg(method, c("percPos", "percUnder")) if (type == "percPos") { #perc < (N.1/N * 100)) means removing minority observations stopifnot(perc >= (N.1/N * 100), perc <= 50) N.0.sub <- floor(N.1 * (100 - perc) / perc) } if (type == "percUnder") { # perc = N.1/N.0 * 100 is the minimum value allowed that correspond to percPos with perc = 50 stopifnot(perc >= N.1/N.0 * 100, perc <= 100) N.0.sub <- floor(perc/100 * N.0) } # if the weights are not given, assign equal probability to all examples. if (is.null(w)) w <- rep(1/N.0, N.0) if (N.0.sub <= N.0) i.0.sub <- sample(i.0, N.0.sub, prob = w) else stop("subset of majoirty instances bigger than orginal set of majoirty instances") i.0.rm <- setdiff(i.0, i.0.sub) Id <- c(i.0.sub, i.1) Id <- sort(Id) if (is.vector(X) != TRUE) X <- X[Id, ] else X <- X[Id] Y <- Y[Id] return(list(X = X, Y = Y, id.rm = i.0.rm)) } unbalanced/R/ubSmoteExs.R0000644000175100001440000000411212543220332015000 0ustar hornikusersubSmoteExs <- function(data,tgt,N=200,k=5) { # INPUTS: # data are the rare cases (the minority "class" cases) # tgt is the name of the target variable # N is the percentage of over-sampling to carry out; # and k is the number of nearest neighours to use for the generation # OUTPUTS: # The result of the function is a (N/100)*T set of generated # examples with rare values on the target nomatr <- c() T <- matrix(nrow=dim(data)[1],ncol=dim(data)[2]-1) for(col in seq.int(dim(T)[2])){ cl <- class(data[,col]) if (cl %in% c('Date','POSIXct','POSIXt')) stop("cannot SMOTE variables of class Date, POSIXct or POSIXt") if (cl %in% c('factor','character')) { T[,col] <- as.integer(data[,col]) nomatr <- c(nomatr,col) } else T[,col] <- data[,col] } if (N < 100) { # only a percentage of the T cases will be SMOTEd nT <- NROW(T) idx <- sample(1:nT,as.integer((N/100)*nT)) T <- T[idx,] N <- 100 } p <- dim(T)[2] nT <- dim(T)[1] ranges <- apply(T,2,max)-apply(T,2,min) nexs <- as.integer(N/100) # this is the number of artificial exs generated # for each member of T new <- matrix(nrow=nexs*nT,ncol=p) # the new cases for(i in 1:nT) { # the k NNs of case T[i,] xd <- scale(T,T[i,],ranges) for(a in nomatr) xd[,a] <- xd[,a]==0 dd <- drop(xd^2 %*% rep(1, ncol(xd))) kNNs <- order(dd)[2:(k+1)] for(n in 1:nexs) { # select randomly one of the k NNs neig <- sample(1:k,1) ex <- vector(length=ncol(T)) # the attribute values of the generated case difs <- T[kNNs[neig],]-T[i,] new[(i-1)*nexs+n,] <- T[i,]+runif(1)*difs for(a in nomatr) new[(i-1)*nexs+n,a] <- c(T[kNNs[neig],a],T[i,a])[1+round(runif(1),0)] } } newCases <- data.frame(new) for(a in nomatr) newCases[,a] <- factor(newCases[,a],levels=1:nlevels(data[,a]),labels=levels(data[,a])) newCases[,tgt] <- factor(rep(data[1,tgt],nrow(newCases)),levels=levels(data[,tgt])) colnames(newCases) <- colnames(data) newCases } unbalanced/R/ubOSS.R0000644000175100001440000000175012543220332013702 0ustar hornikusersubOSS <- function(X, Y, verbose=TRUE){ stopifnot(class(verbose) == "logical", all(unique(Y) %in% c(0, 1))) #only numeric features are allowed if(any(sapply(X,is.numeric)==FALSE)) stop("only numeric features are allowed to compute nearest neighbors") S.X<-X S.Y<-Y i.1<-which(Y==1) N.1<-length(i.1) i.0<-which(Y==0) N.0<-length(i.0) if(N.1==0 | N.0==0) { cat("all instances of the same class \n") return(list(X=X,Y=Y)) } #initially C contains all 1s from S and one random 0 obs id.C<-c(i.1,sample(i.0,1)) C.X<-X[id.C, ] C.Y<-Y[id.C] #use C to to build a 1-NN and classify all obs in S Y.knn<-knn(C.X, S.X, C.Y, k = 1) #move missclassified obs into C id.miss<-which(S.Y!=Y.knn) id.C<-c(id.C,id.miss) id.C <- sort(id.C) #id.C<-sample(id.C) C.X<-X[id.C, ] C.Y<-Y[id.C] #now C is consistent with S #remove from C 0s that are tomek links data<-ubTomek(C.X, C.Y, verbose) X<-data$X Y<-data$Y return(list(X=X,Y=Y)) } unbalanced/R/ubRacing.R0000644000175100001440000003437412543220332014451 0ustar hornikusersubRacing <- function(formula, data, algo, positive=1, ncore = 1, nFold=10, maxFold=10, maxExp=100, stat.test="friedman", metric="f1", ubConf, verbose=FALSE, ...){ stopifnot(class(formula)=="formula", NROW(data)>1, NCOL(data)>1, is.logical(verbose), ncore>0, nFold>1) metric <- match.arg(metric, c("f1","gmean", "auc")) stat.test <- match.arg(stat.test, c("friedman","t.bonferroni", "t.holm", "t.none", "no")) stopifnot(class(ubConf)=="list", names(ubConf) %in% c("type", "percOver", "percUnder", "k", "perc", "method", "w")) target <- as.character(formula[[2]]) tgt <- which(names(data) == target) if(length(tgt) == 0) stop("target variable not defined") predCandidate <- function(Xtr, Ytr, Xts, Yts, algo, balType, positive, ubConf, metric, verbose, ...){ if (balType != "unbal"){ #re-balance the dataset data <- ubBalance(Xtr, factor(Ytr), type=balType, positive=positive, ubConf$percOver, ubConf$percUnder, ubConf$k, ubConf$perc, ubConf$method, ubConf$w, verbose) TR <- data.frame(data$X, Y=as.factor(data$Y)) } else TR <- data.frame(Xtr, Y=as.factor(Ytr)) #single prediction without parameter tuning #library(mlr) lrnTask <- makeClassifTask(id=paste(algo, balType, sep="_"), data=TR, target="Y", positive=positive) lrnTask <- removeConstantFeatures(lrnTask, show.info=verbose) L <- lrnTask$task.desc$class.levels negative <- setdiff(L, positive) if(length(L) > 2) stop("only binary classification supported yet") race.lrn <- paste(lrnTask$task.desc$type, algo, sep=".") lrn <- makeLearner(race.lrn, predict.type = "prob", ...) mod <- train(lrn, lrnTask) newdata <- data.frame(Xts, Y=as.factor(Yts)) pred <- predict(mod, newdata=newdata) perf <- mlr::performance(pred, measures = list(gmean, f1, mlr::auc)) res.metric <- as.numeric(perf[metric]) #revert metric since racing is a minimizing algorithm res <- 1 - res.metric res } #test each methods on the same observations #return a vector of errors (1 or 0 for each method) testCandidates <- function(Xtr, Xts, Ytr, Yts, algo, positive, balanceTypes, ubConf, metric, ncore, verbose, ...){ nBalanceTypes <- length(balanceTypes) #library(doParallel) if (ncore > 1) { doParal <- `%dopar%` } else { doParal <- `%do%` } j <- NULL #We assume that a cluster is already registered! #library(foreach) #make predictions error <- doParal( foreach(j=1:nBalanceTypes, .combine=c, .packages=c('mlr'), .export=c('predCandidate', 'ubBalance', 'ubUnder', 'ubCNN', 'ubENN', 'ubNCL', 'ubOSS', 'ubOver', 'ubSMOTE', 'ubSmoteExs', 'ubTomek')), predCandidate(Xtr, Ytr, Xts, Yts, algo, balanceTypes[j], positive, ubConf, metric, verbose, ...)) # error <- NULL # for(j in 1:nBalanceTypes) { # error <- c(error, predCandidate(Xtr, Ytr, algo, balanceTypes[j], positive, ubConf, metric, verbose, ...)) # } names(error) <- balanceTypes return(error) } #from race package but edited aux2.friedman.edit<- function(y, I = 1:ncol(y), n=nrow(y), conf.level = 0.95,interactive=F) { k <- length(I) r <- t(apply(y[1:n, I, drop=FALSE], 1, rank)) A <- sum(as.vector(r)^2) R <- apply(r, 2, sum) J <- I[order(R)] alpha <- 1 - conf.level TIES <- tapply(r, row(r), table) STATISTIC <- ((12 * sum((R - n * (k + 1)/2)^2))/(n * k * (k + 1) - (sum(unlist(lapply(TIES, function(u) { u^3 - u })))/(k - 1)))) PARAMETER <- k - 1 PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE) if (!is.nan(PVAL) && (PVAL < alpha)) { if (interactive) cat("|-|") t <- qt(1 - alpha/2, (n - 1) * (k - 1)) * (2 * (n * A - sum(R^2))/((n - 1) * (k - 1)))^(1/2) o <- order(R) J <- I[o[1]] for (j in 2:k) if (abs(R[o[j]] - R[o[1]]) > t) break else J <- c(J, I[o[j]]) } else { if (interactive) cat("|=|") } return(J) } #from race package but edited aux.friedman.edit<- function(Results,no.subtasks.sofar,alive,conf.level=0.95,interactive=F) { which.alive <- which(alive) no.alive<-length(which.alive) if (no.alive == 2) { V1 <- Results[1:(no.subtasks.sofar), which.alive[1]] V2 <- Results[1:(no.subtasks.sofar), which.alive[2]] PVAL <- wilcox.test(V1, V2, paired = TRUE, exact = FALSE)$p.value D <- V1 - V2 w.stat <- sum(sign(D) * rank(abs(D))) if (!is.nan(PVAL) && !is.na(PVAL) && (PVAL < 1 - conf.level)) { if (interactive) cat("|-|") if (w.stat < 0) { best <- which.alive[1] alive[which.alive[2]] <- FALSE } else { best <- which.alive[2] alive[which.alive[1]] <- FALSE } } else { if (interactive) cat("|=|") if (w.stat < 0) { best <- which.alive[1] } else { best <- which.alive[2] } } } else { J <- aux2.friedman.edit(Results[1:(no.subtasks.sofar), ,drop=FALSE], I=which.alive, conf.level=conf.level, interactive=interactive) alive[-J] <- FALSE best <- J[1] } return(list(alive=alive,best=best)) } #from race package but edited aux.ttest.edit <- function(Results,no.subtasks.sofar,alive,adjust=c("none", "bonferroni", "holm"),conf.level=0.95,interactive=F) { which.alive <- which(alive) no.alive<-length(which.alive) adjust <- match.arg(adjust) mean.all <- array(0, c(ncol(Results))) for (j in 1:ncol(Results)) mean.all[j] <- sum(Results[1:no.subtasks.sofar, j]/no.subtasks.sofar) best <- match(min(mean.all[alive]), mean.all) PJ <- array(0, dim = c(2, 0)) for (j in which.alive) { Vb <- Results[1:no.subtasks.sofar, best] Vj <- Results[1:no.subtasks.sofar, j] p <- t.test(Vb, Vj, paired = TRUE)$p.value if (!is.nan(p) & !is.na(p)) PJ <- array(c(PJ, j, p), dim = dim(PJ) + c(0, 1)) } PJ[2, ] <- p.adjust(PJ[2, ], method = adjust) dropped.any <- FALSE for (j in 1:ncol(PJ)) if (PJ[2, j] < (1 - conf.level)) { alive[PJ[1, j]]<- FALSE dropped.any <- TRUE } if (interactive) { if (dropped.any) cat("|-|") else cat("|=|") } return(list(alive=alive,best=best)) } #from race package but edited format.precis <- function(title, value,title.width,value.width) { dots <- function(n) return(paste(rep(".", n), collapse = "")) spaces <- function(n) return(paste(rep(" ", n), collapse = "")) string <- paste(title, dots(title.width - nchar(title)), sep = "") if (nchar(value) <= value.width) { string <- paste(string, dots(value.width - nchar(value)),value, sep = "") } else { f.vec <- strwrap(value, width = value.width) first <- f.vec[1] first <- paste(dots(title.width - nchar(first)), first, sep = "") rest <- format(f.vec[-1]) rest <- paste(spaces(title.width + value.width - max(nchar(rest))), rest, sep = "", collapse = "\n") string <- paste(string, paste(first, rest, sep = "\n"), sep = "") } return(paste(string, "\n")) } #from function createFolds in caret package but edited idFolds <- function(y, k = 10){ stopifnot(k > 0) if (is.numeric(y)) { cuts <- floor(length(y)/k) if (cuts < 2) cuts <- 2 if (cuts > 5) cuts <- 5 y <- cut(y, unique(quantile(y, probs = seq(0, 1, length = cuts))), include.lowest = TRUE) } if (k < length(y)) { y <- factor(as.character(y)) numInClass <- table(y) foldVector <- vector(mode = "integer", length(y)) for (i in 1:length(numInClass)) { seqVector <- rep(1:k, numInClass[i]%/%k) if (numInClass[i]%%k > 0) seqVector <- c(seqVector, sample(1:k, numInClass[i]%%k)) foldVector[which(y == dimnames(numInClass)$y[i])] <- sample(seqVector) } } else foldVector <- seq(along = y) foldVector } foldVector <- idFolds(data[ ,tgt], nFold) input <- data[ ,-tgt] output <- data[ ,tgt] balanceTypes <- c("unbal", "ubOver", "ubUnder", "ubSMOTE", "ubOSS","ubCNN", "ubENN", "ubNCL", "ubTomek") nBalanceTypes <- length(balanceTypes) title <- paste("Racing for unbalanced methods selection in", nFold, "fold CV") title.width<-32 value.width<-title.width precis <- paste("\n", title,"\n", format.precis("Number of candidates", nBalanceTypes, title.width, value.width), format.precis("Max number of folds in the CV", maxFold,title.width,value.width), format.precis("Max number of experiments", ifelse(maxExp, maxExp, "unlimited"),title.width,value.width), #format.precis("Metric", metric, title.width, value.width), format.precis("Statistical test", switch(stat.test, friedman = "Friedman test", t.bonferroni = "t-test with Bonferroni's correction for multiple comparisons", t.holm = "t-test with Holm's correction for multiple comparisons", t.none = "t-test with no correction for multiple comparisons", no = "No test"),title.width,value.width) ) cat(precis) if (nBalanceTypes > maxExp & maxExp!=0) stop("Max number of experiments is smaller than number of candidates") if (maxFold > nFold) maxFold <- nFold Results <- matrix(NA, nrow=nFold, ncol=nBalanceTypes) colnames(Results) <- balanceTypes alive <- rep(TRUE,nBalanceTypes) n.Exp <- 0 n.alive <- nBalanceTypes types.alive <- balanceTypes #open a cluster if (ncore > 1) { cl <- makeCluster(ncore) cat(" Parallel execution with", ncore, "cores \n") registerDoParallel(cl) } else makeCluster <- NULL cat(" \n", " Markers: \n", " x No test is performed. \n", " - The test is performed and \n", " some candidates are discarded.\n", " = The test is performed but \n", " no candidate is discarded. \n", " \n", "+-+-----------+-----------+-----------+-----------+-----------+\n", "| | Fold| Alive| Best| Mean best| Exp so far|\n", "+-+-----------+-----------+-----------+-----------+-----------+\n") for(i in 1:nFold){ n.Exp <- n.Exp + n.alive if ((n.Exp>maxExp | maxFold1){ if (stat.test=="friedman") test <- aux.friedman.edit(Results, no.subtasks.sofar=i, alive, conf.level=0.95, interactive=TRUE) if (stat.test=="t.none") test<-aux.ttest.edit(Results,no.subtasks.sofar=i,alive,"none", conf.level=0.95) if (stat.test=="t.holm") test<-aux.ttest.edit(Results,no.subtasks.sofar=i,alive,"holm", conf.level=0.95) if (stat.test=="t.bonferroni") test<-aux.ttest.edit(Results,no.subtasks.sofar=i,alive,"bonferroni", conf.level=0.95) if (stat.test=="no"){ test <- list(alive=rep(TRUE, nBalanceTypes), best=as.numeric(which(err==min(err)))[1]) cat("|x|") } alive <- test$alive best <- test$best } #remove significantly worse mehtods which.alive <- which(alive) n.alive <- length(which.alive) types.alive <- balanceTypes[which.alive] if(i==1){ avg.best <- min(err) cat(" |x|") } else avg.best <- as.numeric(apply(Results, 2, mean, na.rm=TRUE)[best]) #revert metric since racing is a minimizing algorithm avg.best <- 1 - avg.best cat(paste(formatC(i, width = 11), "|", formatC(n.alive, width = 11), "|", formatC(best, width = 11), "|", formatC(avg.best, width = 11), "|", formatC(n.Exp, width = 11), "|\n"," ", sep = "")) } cat("+-+-----------+-----------+-----------+-----------+-----------+\n") colnames(Results) <- balanceTypes Results <- Results[1:max.tasks, ] #revert metric since racing is a minimizing algorithm Results <- 1 - Results #write.csv(Results,file="Race.csv") #close the cluster if (ncore > 1) { stopCluster(cl) rm(cl) } else stopCluster <- NULL if(max.tasks>1){ avg.err <- apply(Results, 2, mean, na.rm=TRUE) sd.err <- apply(Results, 2, sd, na.rm=TRUE) } else { #max.tasks==1 avg.err <- Results sd.err <- Results } avg.best <- as.numeric(avg.err[best]) sd.best <- as.numeric(sd.err[best]) best.desc <- balanceTypes[best] #percentage of computational gain percGain <- round((1 - n.Exp/(nFold*nBalanceTypes))*100, 0) cat("Selected candidate:",best.desc,"\t metric:", metric, "\t mean value:", round(avg.best, 4),"\n") return(list(best=best.desc, avg=avg.best, sd=sd.best, N.test=n.Exp, Gain=percGain, Race=Results)) } unbalanced/R/unbalanced-internal.R0000644000175100001440000002007712543220332016620 0ustar hornikusers.Random.seed <- c(403L, 2L, -988050617L, -1339227619L, -239298412L, 425179194L, -102253947L, -1969722529L, 390669286L, 779300464L, -211828333L, -1808554447L, 1738063072L, 46042494L, -1518067703L, -1491619781L, 101662218L, -746975764L, -1141865873L, -803058267L, -1351119524L, -1231834830L, -52867027L, 295055079L, -810023922L, 1910418920L, -1770522645L, 2135107721L, 1062284312L, 82037574L, 818502049L, 85141555L, 781608354L, 1679777940L, -210712809L, 1500967181L, -1600825308L, -1427998710L, 812871029L, 143187151L, 1820790742L, -1514238080L, -1634475069L, 713491809L, 1891941328L, -1230636242L, 965977817L, 238580971L, 2092447066L, -445896932L, 1602273823L, -1552407947L, 1322020620L, 1700348642L, -2057971779L, -535629769L, 744602334L, -1443084776L, 2117871995L, 697671961L, -426898008L, 1406792022L, 767054289L, -795668093L, -2113834990L, -2024365148L, 1599476583L, -1984223043L, 1606406196L, 1850811290L, 2073604069L, -2026341505L, 1892248326L, -754465648L, 838145331L, -961432879L, 1876834432L, 455696734L, -308947287L, -2013101733L, -1528870038L, -1682726388L, -1583996337L, -881012987L, 148146108L, -1814289774L, 1630652813L, 447508935L, 450974382L, 1002535688L, -2129549045L, -2028570647L, -111079240L, -223373466L, -1402342399L, 723911315L, -1782422206L, 116435700L, -864086281L, 1510602477L, -232455804L, -1926557782L, 805066005L, -814882897L, 1960964022L, 1955042784L, -480318173L, -694566335L, -8606416L, 1029613390L, -90591815L, 724668363L, -258227078L, 335697276L, -628609601L, 1465584277L, -909969236L, 941752962L, 190941789L, 232865111L, -463110082L, -433384328L, 1034616667L, 1077955257L, 603342600L, -871907914L, 1310073585L, -1107188445L, -1067417742L, 403033540L, 1237783943L, 1551849309L, 1148270932L, -1881359494L, 54540997L, 107670559L, -1587102810L, -575816400L, -871497773L, -738759183L, 1788713760L, -1534732866L, 482438601L, -1153797765L, 1267087178L, -284563540L, -511103313L, -951047067L, -963945444L, 164278898L, -1925626643L, 520215847L, 1741162830L, -1176557144L, -1406112341L, 322954953L, 1184399064L, -1657150586L, 1323988577L, 552431603L, 280756834L, 1171305684L, 1119229399L, -1502044211L, -760063132L, 1801584458L, 265954997L, -248168945L, 325086998L, -835503680L, -1163228797L, 2147266465L, -370278512L, -650124178L, 1380143641L, -1448076629L, -2140840294L, 952891996L, -1367914657L, 1347983669L, -1958561588L, 1577452066L, -696214403L, -1038301833L, 1974823582L, -473241256L, -111832901L, 1363758681L, -977461784L, 436152342L, 2136101777L, 281959491L, -1305717934L, -1547765788L, -1171143129L, 1122270845L, 286949236L, -1235332262L, -2033810011L, -505044545L, 1326500422L, 2020935888L, -1826061L, -866700271L, 595305024L, 60995102L, -12361495L, 1832739355L, -607862230L, -826653364L, -2003728625L, 646064709L, -2003067908L, 1591960146L, 1752907725L, 1260763527L, 1165110382L, -795388856L, -1115984309L, -1705229399L, 1065929464L, 2112020006L, -1830824127L, 945003731L, -1540030334L, 249478836L, -463422921L, -1502740179L, 336777796L, 283083832L, -713287654L, 1929321168L, 187913276L, 1715225748L, -878871422L, -446249024L, 1287264572L, 1617646224L, -136000478L, -211426296L, 273926284L, 101506076L, -830867822L, -762731392L, -1372999660L, 2018805032L, -1994912198L, 449381584L, -21293972L, 1535234740L, 983201810L, 1727113184L, 882495436L, 1932930528L, 323550754L, -1873475288L, 88305932L, -1732966852L, 1059016946L, -515790096L, 236958532L, 725834328L, 842179130L, 1220183792L, -1579725636L, -930763244L, 283705538L, -283549600L, -164868740L, 619953360L, 1971776546L, -1083229528L, -1384235124L, -1677608516L, 482198578L, -363714752L, -1192625996L, -692071608L, -768546118L, -443642096L, 960059628L, 671675476L, -198846702L, -786091104L, -1067147316L, -1954632864L, 123109570L, 55805480L, 1442452908L, -689311940L, -2052554894L, 1864553264L, -1915305436L, -920702920L, -700838694L, -1424162416L, 2005923580L, -12775532L, -651836606L, -1314555392L, -710876612L, -545188272L, -251088222L, -1303856312L, 1970797068L, 2120936668L, -1393708462L, 1375447168L, -1453425836L, 1296437736L, -1409668358L, -630253744L, -2070232020L, -603646604L, 413892242L, -1993704864L, 514478732L, 158788448L, -1615105310L, 1890597032L, -780209972L, 234676732L, 400850866L, 1240675824L, 1188785604L, 908875608L, 928676730L, 2090712496L, -1197030980L, -756039596L, -14060606L, -1111112928L, 1735784380L, 270873168L, -1672385054L, 1698099560L, 310340940L, 482590972L, 1661420274L, 1075072768L, 251002484L, 344702984L, 1914409402L, 654766032L, 1126894060L, -718698604L, 409547474L, 187571808L, -1384037044L, -367729376L, 2044054146L, -1903470104L, -430768148L, -337631684L, -986970510L, 1095707504L, 1533724964L, -120987080L, 980998554L, -1200752176L, -1531796036L, -1479788396L, 1955091586L, 1139066304L, -724710596L, 1263299728L, -1995728094L, -1736935928L, -1135311604L, -1801357924L, -757985774L, -813573248L, -2046381292L, 1171108136L, -1533950918L, 1022430160L, 370686572L, 548368948L, 717748882L, -106543776L, -593659572L, 1881311200L, 1417115042L, -946489944L, -1570597236L, -2116354628L, 31821426L, -1062890512L, 1197911620L, -1644023720L, -426657734L, 1913217136L, 1976186940L, -956882668L, -1590238526L, -395057440L, -454141828L, 1364981456L, 1860008354L, 891078696L, -450442484L, -1648424516L, -736079054L, 1888312000L, 510340148L, 124325576L, 906908858L, 2076956304L, 1841913580L, 1847192148L, -1915645294L, -2139694816L, -757743156L, 1942627040L, 1289449666L, 89173032L, 124835756L, -1352869188L, 338586610L, 1765111344L, -1043071068L, 1103343032L, 429926234L, -277114736L, -30631044L, 1072955668L, -1773029054L, -815685376L, 169881020L, 1617813840L, -1336641246L, -55543224L, 2080886796L, 637040220L, 1367820754L, 1138439680L, -1650297516L, 629050600L, 1003053306L, 1128703696L, 633924396L, -802819852L, 1569462674L, -9314976L, -1262777332L, 861633760L, -1537031582L, -1594866520L, 1685863500L, 871618428L, 2070690098L, 1182331504L, 841052996L, 1498747864L, -1643437190L, -1559525328L, 1731241198L, 59408795L, 755005437L, -1753656598L, 1771013512L, 642368305L, -183788637L, -1005212764L, 1078634066L, 1200399783L, -1785216527L, -1466665674L, 636337620L, 1481323365L, -865928337L, 560526760L, 1749519110L, 892866099L, 1504576661L, 326114546L, 1926663488L, -916852055L, 2011757339L, -174961428L, 1795305722L, -1374695121L, -1360648903L, -1501305010L, -93360740L, -1914130675L, -1596577417L, -325177984L, 957320158L, -191396693L, 1280008109L, 1875784730L, -1604281640L, -773582015L, -2043321357L, 388279828L, 1422485410L, -1690904393L, -1096088959L, 760382086L, -1737895228L, -2059512843L, -1915671969L, 2035965112L, -200394346L, 930874275L, -1331693723L, -626946430L, 1233275888L, 173518233L, -1295260789L, 798346876L, -595262294L, -232966433L, 1553288297L, -666573186L, -1937699220L, 1176351421L, 1761980295L, 1064655728L, -1683428850L, 1656145723L, -2088084451L, -536587062L, -420966424L, 131554513L, -156354365L, 1971523524L, -2028139406L, 1417061703L, -1426794223L, -296940394L, 678666740L, 2108740485L, 1218837199L, 1565665224L, 1953631846L, 555212947L, -891674635L, 111098322L, -4726240L, -1517249527L, -1549754053L, -32332660L, 634876186L, 1242113935L, 632731865L, 593352558L, 1844170492L, -2145424275L, 1922185239L, -1753218592L, 2087742910L, -1679027317L, 1852195853L, 412935226L, -1499503496L, 2090102817L, -317531437L, -50767372L, 131367554L, 1712234263L, 407918049L, -1922929882L, 748930596L, -568753963L, 2094882943L, -1006750440L, 2068737078L, -1671744829L, 643522309L, -789196126L, -376078192L, -191803335L, 1972855019L, 1342935516L, -1782237174L, -2136062209L, -596306103L, 287206622L, -844008820L, 2033808989L, 2111773799L, -1958923888L, -1733553746L, -236403621L, -364489667L, 252074666L, -412178616L, -1984640143L, 1354714339L, 581166052L, 162263826L, 638942951L, -1623816015L, 2125884150L, -1274417900L, 1150530085L, -1886486737L, 1671217384L, 2103386566L, 842092659L, 1155955413L, -1013495630L, 1329503360L, -597688215L, -639566629L, 744091692L, 555413562L, -1386754321L, -1817214599L, 661769742L, -732319780L, -1352996787L, -878351433L, -1966639808L, -1945947106L, 1138029291L, 138091117L, -2004956454L, -1163898216L, 1313861889L, -1458080077L, 885531092L, 1433538786L, -715649826L) unbalanced/R/ubNCL.R0000644000175100001440000000404412543220332013651 0ustar hornikusersubNCL <- function(X, Y, k = 3, verbose = TRUE) { stopifnot(k > 0, class(verbose) == "logical", all(unique(Y) %in% c(0, 1))) #only numeric features are allowed if(any(sapply(X,is.numeric)==FALSE)) stop("only numeric features are allowed to compute nearest neighbors") N.orig <- length(Y) # If an instance belongs to the majority class and the classification given by its # three nearest neighbors contradicts the original class, then it is removed data <- ubENN(X, Y, k, verbose) X <- data$X Y <- data$Y N <- length(Y) i.1 <- which(Y == 1) i.0 <- which(Y == 0) if (length(i.0) == 0) { # if there are no 0 obs then don't do anything if (verbose) cat("Warning: No remaining majority instances after ENN \n") return(list(X = X, Y = Y)) } # If an instance belongs to the minority class and its nearest neighbors # misclasify it, then the nearest neighbors that belong to the majority class are removed. timeRemove <- system.time({ out.hat <- FNN::knn(train = X, test = X[i.1, ], cl = Y, k = k + 1, prob = TRUE) proba.hat <- attr(out.hat, "prob") levels(out.hat) <- c(0, 1) prob.th <- k/(k+1) id.miss <- which((Y[i.1] != out.hat) & (proba.hat >= prob.th)) if (length(id.miss) == 0) { Id <- 1:N id2remove <- NULL } else { out.nn <- attr(out.hat, "nn.index")[, -1] #the fist column is the point itself if (!is.vector(out.nn)) nn.miss <- out.nn[id.miss, ] else nn.miss <- out.nn[id.miss] nn.2check <- unique(as.vector(nn.miss)) id2remove <- nn.2check[which(Y[nn.2check] == 0)] Id <- setdiff(1:N, id2remove) } }) if (verbose) cat("Number of instances removed from majority class after ENN:", length(id2remove), "\t Time needed:", round(timeRemove[3], digits = 2), "\n") Id <- sort(Id) if (is.vector(X) != TRUE) X = X[Id, ] else X = X[Id] Y = Y[Id] N.left <- length(Y) if (verbose) cat("Number of instances removed from majority class:", N.orig - N.left, "\n") return(list(X = X, Y = Y)) } unbalanced/R/ubTomek.R0000644000175100001440000000277112543220332014321 0ustar hornikusersubTomek <- function(X, Y, verbose=TRUE){ stopifnot(class(verbose) == "logical", all(unique(Y) %in% c(0, 1))) #only numeric features are allowed if(any(sapply(X,is.numeric)==FALSE)) stop("only numeric features are allowed to compute nearest neighbors") N<-nrow(X) i.1<-which(Y==1) i.0<-which(Y==0) N.1<-length(i.1) N.0<-length(i.0) if(N.1==0 | N.0==0) { if(verbose) print("observations of the same class, no majority class removed using Tomek links") return(list(X=X,Y=Y)) } else { X.1<-X[i.1,] Y.1<-Y[i.1] timeTomek<-system.time({ #retrive the nearest neighbor (nn) of minority examples #nn<-get.knnx(data=cbind(X,Y),query=cbind(X.1,Y.1),k=2)$nn.index nn<-nn2(data=X,query=X.1, k=2)$nn.idx nn.1<-nn[,2] #the 1-nn is the second column, the first is the point itself #if the nn is from the majority class than it is a tomek link indexTomekLinks<-(Y[nn.1]==0) id2remove<-unique(nn.1[which(indexTomekLinks==T)]) }) if(any(Y[id2remove]==1)) stop("Error: class 1 removed") id2keep<-setdiff(1:N, id2remove) Xred<-X[id2keep,] Yred<-Y[id2keep] if(verbose){ cat("Instances removed",N-length(id2keep),":", round((N-length(id2keep))/N.0*100,digits=2),"% of 0 class ;", round((N-length(id2keep))/N*100,digits=2),"% of training ; Time needed", round(as.numeric(timeTomek["elapsed"]),digits=2),"\n") } return(list(X=Xred,Y=Yred,id.rm=id2remove)) } } unbalanced/R/ubENN.R0000644000175100001440000000244312543220332013656 0ustar hornikusersubENN <- function(X,Y, k=3, verbose=TRUE){ stopifnot(k > 0, class(verbose) == "logical", all(unique(Y) %in% c(0, 1))) #only numeric features are allowed if(any(sapply(X,is.numeric)==FALSE)) stop("only numeric features are allowed to compute nearest neighbors") i.1<-which(Y==1) i.0<-which(Y==0) if(length(i.0)==0){ #if there are no 0 obs then don't do anything if(verbose) cat("Warning: No negative instances \n") return(list(X=X,Y=Y)) } #removes only example from the majority class timeRemove<-system.time({ out.hat <- FNN::knn(train=X,test=X[i.0,], cl=Y, k=k+1,prob=TRUE) #the 1-nn is the point itself therefore we need k+1 proba.hat <- attr(out.hat, "prob") levels(out.hat) <- c(0,1) prob.th <- k/(k+1) id.miss <- which((Y[i.0]!=out.hat) & (proba.hat>=prob.th)) }) if(verbose) cat("Number of instances removed from majority class with ENN:",length(id.miss), "\t Time needed:",round(as.numeric(timeRemove["elapsed"]),digits=2),"\n") if(length(id.miss)==0) id.keep.0<-i.0 else id.keep.0<-setdiff(i.0,i.0[id.miss]) Id<-c(id.keep.0,i.1) Id<-sort(Id) id.removed<-setdiff(1:nrow(X),Id) if (is.vector(X)!=TRUE) X=X[Id,] else X=X[Id] Y=Y[Id] return(list(X=X,Y=Y,id.rm=id.removed)) } unbalanced/R/ubBalance.R0000644000175100001440000000463612543220332014571 0ustar hornikusersubBalance <- function(X, Y, type = "ubSMOTE", positive = 1, percOver = 200, percUnder = 200, k = 5, perc = 50, method = "percPos", w = NULL, verbose = FALSE) { if (any(is.na(Y))) stop("Y has NAs") if (!is.factor(Y)) stop("Y must be a factor") lev <- levels(Y) if (length(lev) != 2) stop("Y must be a binary factor variable") # transform the output in the range {0, 1} Y <- factor(Y == positive, levels = c(FALSE, TRUE), labels = c(0, 1)) if (length(type) > 1) stop("balance type does not support multiple selection") N.0 <- length(which(Y == 0)) N.1 <- length(which(Y == 1)) if (N.0 == 0) { cat("Warning: No negative instances, skip balance \n") return(list(X = X, Y = Y)) } if (N.1 == 0) { cat("Warning: No positive instances, skip balance \n") return(list(X = X, Y = Y)) } if (N.0 == N.1) { cat("Warning: equal number of positives and negatives, skip balance \n") return(list(X = X, Y = Y)) } if (N.0 < N.1) stop(positive, " class is not the minority class") data <- NULL if (type == "ubOver") data <- ubOver(X, Y, k, verbose) if (type == "ubUnder") data <- ubUnder(X, Y, perc, method, w) if (type == "ubSMOTE") data <- ubSMOTE(X, Y, percOver, k, percUnder, verbose) if (type == "ubOSS") data <- ubOSS(X, Y, verbose) if (type == "ubCNN") data <- ubCNN(X, Y, k, verbose) if (type == "ubENN") data <- ubENN(X, Y, k, verbose) if (type == "ubNCL") data <- ubNCL(X, Y, k, verbose) if (type == "ubTomek") data <- ubTomek(X, Y, verbose) if (is.null(data)) stop("technique", type, " not supported") X <- data$X Y <- data$Y id.rm <- data$id.rm if (is.null(id.rm)) id.rm <- NA N <- length(Y) # Id <- sample(1:N) # if (!is.vector(X)) # X = X[Id, ] # else { # # is.vector # X = X[Id] # if (any(is.na(X))) # cat("WARNINGS: vector has NAs \n") # if (all(X == X[1])) # cat("WARNINGS: constant vector after", type, "\n") # } # Y = Y[Id] if (verbose) { cat("Proportion of positives after", type, ":", round(length(which(Y == 1))/N * 100, digits = 2), "% of", N, "observations \n") } #transform the outout with the original labels Y <- factor(Y == 1, levels = c(FALSE, TRUE), labels = lev) return(list(X = X, Y = Y, id.rm = id.rm)) } unbalanced/MD50000644000175100001440000000251512543234315012701 0ustar hornikusers08c425cfae6ad3f02c682b9db886f4a1 *DESCRIPTION 9dff6d70fd4d84254acc11ca5cc8318d *NAMESPACE 62bb3caa75bd03991c83f587447d476e *R/ubBalance.R 51642795f32e8f7324b6964abb49d3e6 *R/ubCNN.R 90c897b73ebf5270b29a59c47075cd49 *R/ubENN.R 62d32cde7567d3735cc6168986f9ee35 *R/ubNCL.R 654fa8fc0e442d5f788c51206683b8ea *R/ubOSS.R 40328b892df6a202c6d671ba0787f282 *R/ubOver.R 82bafa9968cdffca6400e1ae01231259 *R/ubRacing.R 75d973392ab91c447123aecd6f5ae691 *R/ubSMOTE.R 0f91d655a90c31da4db05549607c46ad *R/ubSmoteExs.R 2266ddbc77b0f8adc127b0f77e1e5cbe *R/ubTomek.R 265d5ccb8e472045425d8cbc3172c5cf *R/ubUnder.R 3ff3ca95d71a9a97dbcd1580c81d1367 *R/unbalanced-internal.R 304847c969d809c547ea46f49d1fad44 *data/ubIonosphere.rda 135f3fc22e56c224ffb1c637ea4d9f4b *man/ubBalance.Rd f051859337258d85b22928215152aeb7 *man/ubCNN.Rd 60840ac61968c7c18cc6ed1ee2d81327 *man/ubENN.Rd da1ea42f84ea400fd601c7da0e42ab13 *man/ubIonosphere.Rd cc9ac7903adfa9172f2ba5077090dd49 *man/ubNCL.Rd a45ecb3ae4e4dab59c54c87990265c30 *man/ubOSS.Rd 524f4a55e17b926e7c6eb9de0e7b0029 *man/ubOver.Rd 574200f3fad678de190d13714ad134af *man/ubRacing.Rd 5f91719801fb7df3f27fc4a4f2f31eac *man/ubSMOTE.Rd f70d15767dffdbd9193f479c8c43f51c *man/ubSmoteExs.Rd 5bbeeaa091cfe15ca5323a7a7c5a9acf *man/ubTomek.Rd f8bb4bf0901969bc8a87577b63d74921 *man/ubUnder.Rd 643bc9d2b2922d92e2dae6ff13a6d9da *man/unbalanced-package.Rd unbalanced/DESCRIPTION0000644000175100001440000000174212543234315014100 0ustar hornikusersPackage: unbalanced Type: Package Title: Racing for Unbalanced Methods Selection Version: 2.0 Date: 2015-06-25 Author: Andrea Dal Pozzolo, Olivier Caelen and Gianluca Bontempi Maintainer: Andrea Dal Pozzolo Description: A dataset is said to be unbalanced when the class of interest (minority class) is much rarer than normal behaviour (majority class). The cost of missing a minority class is typically much higher that missing a majority class. Most learning systems are not prepared to cope with unbalanced data and several techniques have been proposed. This package implements some of most well-known techniques and propose a racing algorithm to select adaptively the most appropriate strategy for a given unbalanced task. License: GPL (>= 3) URL: http://mlg.ulb.ac.be Depends: mlr, foreach, doParallel Imports: FNN, RANN Suggests: randomForest, ROCR Packaged: 2015-06-26 09:54:30 UTC; Andrea NeedsCompilation: no Repository: CRAN Date/Publication: 2015-06-26 13:34:37 unbalanced/man/0000755000175100001440000000000012543220261013134 5ustar hornikusersunbalanced/man/ubUnder.Rd0000644000175100001440000000321012542474237015040 0ustar hornikusers\name{ubUnder} \alias{ubUnder} \title{Under-sampling} \description{The function removes randomly some instances from the majority (negative) class and keeps all instances in the minority (positive) class in order to obtain a more balanced dataset. It allows two ways to perform undersampling: i) by setting the percentage of positives wanted after undersampling (percPos method), ii) by setting the sampling rate on the negatives, (percUnder method). For percPos, "perc"has to be (N.1/N * 100) <= perc <= 50, where N.1 is the number of positive and N the total number of instances. For percUnder, "perc"has to be (N.1/N.0 * 100) <= perc <= 100, where N.1 is the number of positive and N.0 the number of negative instances. } \usage{ubUnder(X, Y, perc = 50, method = "percPos", w = NULL)} \arguments{ \item{X}{the input variables of the unbalanced dataset.} \item{Y}{the response variable of the unbalanced dataset. It must be a binary factor where the majority class is coded as 0 and the minority as 1.} \item{perc}{percentage of sampling.} \item{method}{method to perform under sampling ("percPos", "percUnder").} \item{w}{weights used for sampling the majority class, if NULL all majority instances are sampled with equal weights} } \value{ The function returns a list: \item{X}{input variables} \item{Y}{response variable} \item{id.rm}{index of instances removed} } \seealso{ \code{\link{ubBalance}} } \examples{ library(unbalanced) data(ubIonosphere) n<-ncol(ubIonosphere) output<-ubIonosphere$Class input<-ubIonosphere[ ,-n] data<-ubUnder(X=input, Y= output, perc = 40, method = "percPos") newData<-cbind(data$X, data$Y) }unbalanced/man/ubBalance.Rd0000644000175100001440000000413112542763320015305 0ustar hornikusers\name{ubBalance} \alias{ubBalance} \title{Balance wrapper} \description{The function implements several techniques to re-balance or remove noisy instances in unbalanced datasets.} \usage{ubBalance(X, Y, type="ubSMOTE", positive=1, percOver=200, percUnder=200, k=5, perc=50, method="percPos", w=NULL, verbose=FALSE)} \arguments{ \item{X}{the input variables of the unbalanced dataset.} \item{Y}{the response variable of the unbalanced dataset.} \item{type}{the balancing technique to use (ubOver, ubUnder, ubSMOTE, ubOSS, ubCNN, ubENN, ubNCL, ubTomek).} \item{positive}{the majority class of the response variable.} \item{percOver}{parameter used in ubSMOTE} \item{percUnder}{parameter used in ubSMOTE} \item{k}{parameter used in ubOver, ubSMOTE, ubCNN, ubENN, ubNCL} \item{perc}{parameter used in ubUnder} \item{method}{parameter used in ubUnder} \item{w}{parameter used in ubUnder} \item{verbose}{print extra information (TRUE/FALSE)} } \details{The argument type can take the following values: "ubOver" (over-sampling), "ubUnder" (under-sampling), "ubSMOTE" (SMOTE), "ubOSS" (One Side Selection), "ubCNN" (Condensed Nearest Neighbor), "ubENN" (Edited Nearest Neighbor), "ubNCL" (Neighborhood Cleaning Rule), "ubTomek" (Tomek Link).} \value{ The function returns a list: \item{X}{input variables} \item{Y}{response variable} \item{id.rm}{index of instances removed if availble in the technique selected} } \references{Dal Pozzolo, Andrea, et al. "Racing for unbalanced methods selection." Intelligent Data Engineering and Automated Learning - IDEAL 2013. Springer Berlin Heidelberg, 2013. 24-31.} \seealso{ \code{\link{ubRacing}}, \code{\link{ubOver}}, \code{\link{ubUnder}}, \code{\link{ubSMOTE}}, \code{\link{ubOSS}}, \code{\link{ubCNN}}, \code{\link{ubENN}}, \code{\link{ubNCL}}, \code{\link{ubTomek}} } \examples{ library(unbalanced) data(ubIonosphere) n<-ncol(ubIonosphere) output<-ubIonosphere$Class input<-ubIonosphere[ ,-n] #balance the dataset data<-ubBalance(X= input, Y=output, type="ubSMOTE", percOver=300, percUnder=150, verbose=TRUE) balancedData<-cbind(data$X,data$Y) }unbalanced/man/ubTomek.Rd0000644000175100001440000000211012542474247015041 0ustar hornikusers\name{ubTomek} \alias{ubTomek} \title{Tomek Link} \description{The function finds the points in the dataset that are tomek link using 1-NN and then removes only majority class instances that are tomek links.} \usage{ubTomek(X, Y, verbose = TRUE)} \arguments{ \item{X}{the input variables of the unbalanced dataset.} \item{Y}{the response variable of the unbalanced dataset. It must be a binary factor where the majority class is coded as 0 and the minority as 1.} \item{verbose}{print extra information (TRUE/FALSE)} } \details{In order to compute nearest neighbors, only numeric features are allowed.} \value{ The function returns a list: \item{X}{input variables} \item{Y}{response variable} \item{id.rm}{index of instances removed} } \references{I. Tomek. Two modifications of cnn. IEEE Trans. Syst. Man Cybern., 6:769-772, 1976.} \seealso{ \code{\link{ubBalance}} } \examples{ library(unbalanced) data(ubIonosphere) n<-ncol(ubIonosphere) output<-ubIonosphere$Class input<-ubIonosphere[ ,-n] data<-ubTomek(X=input, Y= output) newData<-cbind(data$X, data$Y) }unbalanced/man/ubRacing.Rd0000644000175100001440000001013212543220261015152 0ustar hornikusers\name{ubRacing} \alias{ubRacing} \title{Racing} \description{The function implementes the Racing algorithm [2] for selecting the best technique to re-balance or remove noisy instances in unbalanced datasets [1].} \usage{ ubRacing(formula, data, algo, positive=1, ncore=1, nFold=10, maxFold=10, maxExp=100, stat.test="friedman", metric="f1", ubConf, verbose=FALSE, ...) } \arguments{ \item{formula}{formula describing the model to be fitted.} \item{data}{the unbalanced dataset} \item{algo}{the classification algorithm to use with the mlr package.} \item{positive}{label of the positive (minority) class.} \item{ncore}{the number of core to use in the Race. Race is performed with parallel exectuion when ncore > 1.} \item{nFold}{number of folds in the cross-validation that provides the subset of data to the Race} \item{maxFold}{maximum number of folds to use in the Race} \item{maxExp}{maximum number of experiments to use in the Race} \item{stat.test}{statistical test to use to remove candidates which perform significantly worse than the best.} \item{metric}{metric used to asses the classification.} \item{ubConf}{configuration of the balancing techniques used in the Race.} \item{verbose}{print extra information (TRUE/FALSE)} \item{\dots}{additional arguments pass to train function in mlr package.} } \details{The argument metric can take the following values: "gmean", "f1" (F-score or F-measure), "auc" (Area Under ROC curve). Argument stat.test defines the statistical test used to remove candidates during the race. It can take the following values: "friedman" (Friedman test), "t.bonferroni" (t-test with bonferroni correction), "t.holm" (t-test with holm correction), "t.none" (t-test without correction), "no" (no test, the Race continues until new subsets of data are provided by the cross validation). Argument balanceConf is a list passed to function ubBalance that is used for configuration.} \value{ The function returns a list: \item{Race}{matrix containing accuracy results for each technique in the Race.} \item{best}{best technique selected in the Race.} \item{avg}{average of the metric used in the Race for the technique selected.} \item{sd}{standard deviation of the metric used in the Race for the technique selected.} \item{N.test}{number of experiments used in the Race.} \item{Gain}{\% of computational gain with resepct to the maximum number of experiments given by the cross validation.} } \references{1. Dal Pozzolo, Andrea, et al. "Racing for unbalanced methods selection." Intelligent Data Engineering and Automated Learning - IDEAL 2013. Springer Berlin Heidelberg, 2013. 24-31.\cr 2. Birattari, Mauro, et al. "A Racing Algorithm for Configuring Metaheuristics."GECCO. Vol. 2. 2002.} \note{The function ubRacing is a modified version of the race function availble in the race package: \url{http://cran.r-project.org/package=race}.} \seealso{ \code{\link{ubBalance}}, \code{\link{ubOver}}, \code{\link{ubUnder}}, \code{\link{ubSMOTE}}, \code{\link{ubOSS}}, \code{\link{ubCNN}}, \code{\link{ubENN}}, \code{\link{ubNCL}}, \code{\link{ubTomek}} } \examples{ #use Racing to select the best technique for an unbalanced dataset library(unbalanced) data(ubIonosphere) #configure sampling parameters ubConf <- list(type="ubUnder", percOver=200, percUnder=200, k=2, perc=50, method="percPos", w=NULL) #load the classification algorithm that you intend to use inside the Race #see 'mlr' package for supported algorithms library(randomForest) #use only 5 trees results <- ubRacing(Class ~., ubIonosphere, "randomForest", positive=1, ubConf=ubConf, ntree=5) # try with 500 trees # results <- ubRacing(Class ~., ubIonosphere, "randomForest", positive=1, ubConf=ubConf, ntree=500) # let's try with a different algorithm # library(e1071) # results <- ubRacing(Class ~., ubIonosphere, "svm", positive=1, ubConf=ubConf) # library(rpart) # results <- ubRacing(Class ~., ubIonosphere, "rpart", positive=1, ubConf=ubConf) } % Add one or more standard keywords, see file 'KEYWORDS' in the % R documentation directory. % \keyword{ ~kwd1 } % \keyword{ ~kwd2 }% __ONLY ONE__ keyword per line unbalanced/man/ubOver.Rd0000644000175100001440000000223612542474266014707 0ustar hornikusers\name{ubOver} \alias{ubOver} \title{Over-sampling} \description{ The function replicates randomly some instances from the minority class in order to obtain a final dataset with the same number of instances from the two classes.} \usage{ubOver(X, Y, k = 0, verbose=TRUE)} \arguments{ \item{X}{the input variables of the unbalanced dataset.} \item{Y}{the response variable of the unbalanced dataset. It must be a binary factor where the majority class is coded as 0 and the minority as 1.} \item{k}{defines the sampling method.} \item{verbose}{print extra information (TRUE/FALSE)} } \details{ If K=0: sample with replacement from the minority class until we have the same number of instances in each class. If K>0: sample with replacement from the minority class until we have k-times the orginal number of minority instances. } \value{ The function returns a list: \item{X}{input variables} \item{Y}{response variable} } \seealso{ \code{\link{ubBalance}} } \examples{ library(unbalanced) data(ubIonosphere) n<-ncol(ubIonosphere) output<-ubIonosphere$Class input<-ubIonosphere[ ,-n] data<-ubOver(X=input, Y= output) newData<-cbind(data$X, data$Y) }unbalanced/man/ubNCL.Rd0000644000175100001440000000252512542474303014401 0ustar hornikusers\name{ubNCL} \alias{ubNCL} \title{Neighborhood Cleaning Rule} \description{ Neighborhood Cleaning Rule modifies the Edited Nearest Neighbor method by increasing the role of data cleaning. Firstly, NCL removes negatives examples which are misclassified by their 3-nearest neighbors. Secondly, the neighbors of each positive examples are found and the ones belonging to the majority class are removed. } \usage{ubNCL(X, Y, k = 3, verbose = TRUE)} \arguments{ \item{X}{the input variables of the unbalanced dataset.} \item{Y}{the response variable of the unbalanced dataset. It must be a binary factor where the majority class is coded as 0 and the minority as 1.} \item{k}{the number of neighbours to use} \item{verbose}{print extra information (TRUE/FALSE)} } \details{In order to compute nearest neighbors, only numeric features are allowed.} \value{ The function returns a list: \item{X}{input variables} \item{Y}{response variable} } \references{J. Laurikkala. Improving identification of difficult small classes by balancing class distribution. Artificial Intelligence in Medicine, pages 63-66, 2001.} \seealso{ \code{\link{ubBalance}} } \examples{ library(unbalanced) data(ubIonosphere) n<-ncol(ubIonosphere) output<-ubIonosphere$Class input<-ubIonosphere[ ,-n] data<-ubNCL(X=input, Y= output) newData<-cbind(data$X, data$Y) }unbalanced/man/ubCNN.Rd0000644000175100001440000000214612542474325014406 0ustar hornikusers\name{ubCNN} \alias{ubCNN} \title{Condensed Nearest Neighbor} \description{Condensed Nearest Neighbor selects the subset of instances that are able to correctly classifing the original datasets using a one-nearest neighbor rule.} \usage{ubCNN(X, Y, k = 1, verbose = T)} \arguments{ \item{X}{the input variables of the unbalanced dataset.} \item{Y}{the response variable of the unbalanced dataset. It must be a binary factor where the majority class is coded as 0 and the minority as 1.} \item{k}{the number of neighbours to use} \item{verbose}{print extra information (TRUE/FALSE)} } \details{In order to compute nearest neighbors, only numeric features are allowed.} \value{ The function returns a list: \item{X}{input variables} \item{Y}{response variable} } \references{ P. E. Hart. The condensed nearest neighbor rule. IEEE Transactions on Informa- tion Theory, 1968. } \seealso{ \code{\link{ubBalance}} } \examples{ library(unbalanced) data(ubIonosphere) n<-ncol(ubIonosphere) output<-ubIonosphere$Class input<-ubIonosphere[ ,-n] data<-ubCNN(X=input, Y= output) newData<-cbind(data$X, data$Y) }unbalanced/man/unbalanced-package.Rd0000644000175100001440000000523112543220213017106 0ustar hornikusers\name{unbalanced-package} \alias{unbalanced-package} \alias{unbalanced} \docType{package} \title{Racing for Unbalanced Methods Selection} \description{A dataset is said to be unbalanced when the class of interest (minority class) is much rarer than normal behaviour (majority class). The cost of missing a minority class is typically much higher that missing a majority class. Most learning systems are not prepared to cope with unbalanced data and several techniques have been proposed to rebalance the classes. This package implements some of most well-known techniques and propose a racing algorithm [2] to select adaptively the most appropriate strategy for a given unbalanced task [1]. } \details{ \tabular{ll}{ Package: \tab unbalanced\cr Type: \tab Package\cr Version: \tab 2.0\cr Date: \tab 2015-06-17\cr License: \tab GPL (>= 3)\cr } } \author{ Andrea Dal Pozzolo \email{adalpozz@ulb.ac.be}, Olivier Caelen \email{olivier.caelen@worldline.com} and Gianluca Bontempi \email{gbonte@ulb.ac.be} Maintainer: Andrea Dal Pozzolo Andrea Dal Pozzolo and Gianluca Bontempi are with the \href{http://mlg.ulb.ac.be}{Machine Learning Group}, Computer Science Department, Faculty of Sciences ULB, Universite Libre de Bruxelles, Brussels, Belgium.\cr Olivier Caelen is with the Fraud Risk Management Analytics, Worldline, Belgium.\cr The work of Andrea Dal Pozzolo is supported by the Doctiris scholarship of Innoviris, Belgium. } \references{ 1. Dal Pozzolo, Andrea, et al. "Racing for unbalanced methods selection." Intelligent Data Engineering and Automated Learning - IDEAL 2013. Springer Berlin Heidelberg, 2013. 24-31.\cr 2. Birattari, Mauro, et al. "A Racing Algorithm for Configuring Metaheuristics."GECCO. Vol. 2. 2002. } \keyword{unbalanced datasets, imbalanced learning} \seealso{ \code{\link{ubBalance}}, \code{\link{ubRacing}} } \examples{ #use Racing to select the best technique for an unbalanced dataset library(unbalanced) data(ubIonosphere) #configure sampling parameters ubConf <- list(type="ubUnder", percOver=200, percUnder=200, k=2, perc=50, method="percPos", w=NULL) #load the classification algorithm that you intend to use inside the Race #see 'mlr' package for supported algorithms library(randomForest) #use only 5 trees results <- ubRacing(Class ~., ubIonosphere, "randomForest", positive=1, ubConf=ubConf, ntree=5) # try with 500 trees # results <- ubRacing(Class ~., ubIonosphere, "randomForest", positive=1, ubConf=ubConf, ntree=500) # let's try with a different algorithm # library(e1071) # results <- ubRacing(Class ~., ubIonosphere, "svm", positive=1, ubConf=ubConf) # library(rpart) # results <- ubRacing(Class ~., ubIonosphere, "rpart", positive=1, ubConf=ubConf) } unbalanced/man/ubSMOTE.Rd0000644000175100001440000000273612542474257014670 0ustar hornikusers\name{ubSMOTE} \alias{ubSMOTE} \title{SMOTE} \description{Function that implements SMOTE (synthetic minority over-sampling technique)} \usage{ubSMOTE(X, Y, perc.over = 200, k = 5, perc.under = 200, verbose = TRUE)} %- maybe also 'usage' for other objects documented here. \arguments{ \item{X}{the input variables of the unbalanced dataset.} \item{Y}{the response variable of the unbalanced dataset. It must be a binary factor where the majority class is coded as 0 and the minority as 1.} \item{perc.over}{per.over/100 is the number of new instances generated for each rare instance. If perc.over < 100 a single instance is generated.} \item{k}{the number of neighbours to consider as the pool from where the new examples are generated} \item{perc.under}{perc.under/100 is the number of "normal" (majority class) instances that are randomly selected for each smoted observation.} \item{verbose}{print extra information (TRUE/FALSE)} } \details{Y must be a factor.} \value{ The function returns a list: \item{X}{input variables} \item{Y}{response variable} } \references{Chawla, Nitesh V., et al. "SMOTE: synthetic minority over-sampling technique." arXiv preprint arXiv:1106.1813 (2011).} \note{Original code from DMwR package} \seealso{ \code{\link{ubBalance}} } \examples{ library(unbalanced) data(ubIonosphere) n<-ncol(ubIonosphere) output<-ubIonosphere$Class input<-ubIonosphere[ ,-n] data<-ubSMOTE(X=input, Y= output) newData<-cbind(data$X, data$Y) }unbalanced/man/ubSmoteExs.Rd0000644000175100001440000000115312271425733015532 0ustar hornikusers\name{ubSmoteExs} \alias{ubSmoteExs} \title{ubSmoteExs} \description{Function used in SMOTE to generate new minority examples.} \usage{ubSmoteExs(data, tgt, N = 200, k = 5)} \arguments{ \item{data}{the data.frame} \item{tgt}{the index of the target/response variables} \item{N}{N/100 is the number of new instances generated for each rare instance. If N < 100 a single instance is generated} \item{k}{the number of neighbours to consider as the pool from where the new examples are generated} } \details{This function does not handle vectors} \value{ newCases } \seealso{ \code{\link{ubSMOTE}} } unbalanced/man/ubOSS.Rd0000644000175100001440000000225112542474274014434 0ustar hornikusers\name{ubOSS} \alias{ubOSS} \title{One Side Selection} \description{One Side Selection is an undersampling method resulting from the application of Tomek links followed by the application of Condensed Nearest Neighbor.} \usage{ubOSS(X, Y, verbose = TRUE)} \arguments{ \item{X}{the input variables of the unbalanced dataset.} \item{Y}{the response variable of the unbalanced dataset. It must be a binary factor where the majority class is coded as 0 and the minority as 1.} \item{verbose}{print extra information (TRUE/FALSE)} } \details{In order to compute nearest neighbors, only numeric features are allowed.} \value{ The function returns a list: \item{X}{input variables} \item{Y}{response variable} } \references{M. Kubat, S. Matwin, et al. Addressing the curse of imbalanced training sets: one-sided selection. In MACHINE LEARNING-INTERNATIONAL WORKSHOP THEN CONFERENCE-, pages 179-186. MORGAN KAUFMANN PUBLISHERS, INC., 1997.} \seealso{ \code{\link{ubBalance}} } \examples{ library(unbalanced) data(ubIonosphere) n<-ncol(ubIonosphere) output<-ubIonosphere$Class input<-ubIonosphere[ ,-n] data<-ubOSS(X=input, Y= output) newData<-cbind(data$X, data$Y) } unbalanced/man/ubENN.Rd0000644000175100001440000000221112542474313014376 0ustar hornikusers\name{ubENN} \alias{ubENN} \title{Edited Nearest Neighbor} \description{Edited Nearest Neighbor removes any example whose class label differs from the class of at least two of its three nearest neighbors.} \usage{ubENN(X, Y, k = 3, verbose = TRUE)} \arguments{ \item{X}{the input variables of the unbalanced dataset.} \item{Y}{the response variable of the unbalanced dataset. It must be a binary factor where the majority class is coded as 0 and the minority as 1.} \item{k}{the number of neighbours to use} \item{verbose}{print extra information (TRUE/FALSE)} } \details{In order to compute nearest neighbors, only numeric features are allowed.} \value{ The function returns a list: \item{X}{input variables} \item{Y}{response variable} } \references{D. Wilson. Asymptotic properties of nearest neighbor rules using edited data. Systems, Man and Cybernetics, IEEE Transactions on, 408-421, 1972.} \seealso{ \code{\link{ubBalance}} } \examples{ library(unbalanced) data(ubIonosphere) n<-ncol(ubIonosphere) output<-ubIonosphere$Class input<-ubIonosphere[ ,-n] data<-ubENN(X=input, Y= output) newData<-cbind(data$X, data$Y) } unbalanced/man/ubIonosphere.Rd0000644000175100001440000000125712543215227016100 0ustar hornikusers\name{ubIonosphere} \alias{ubIonosphere} \docType{data} \title{Ionosphere dataset} \description{ The datasets is a modifcation of Ionosphere dataset cotained in "mlbench" package. It contains only numerical input variables, i.e. the first two variables are removed. The Class variable orginally taking values bad and good has been transformed into a factor where 1 denotes bad and 0 good. } \usage{data(ubIonosphere)} \format{ A data frame with 351 observations on 33 independent variables (all numerical) and one last defining the class (1 or 0). } \source{\url{http://cran.r-project.org/package=mlbench}} \examples{ data(ubIonosphere) summary(ubIonosphere) } \keyword{datasets}